summaryrefslogtreecommitdiff
path: root/net/ipv4/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/netfilter')
-rw-r--r--net/ipv4/netfilter/Kconfig696
-rw-r--r--net/ipv4/netfilter/Makefile89
-rw-r--r--net/ipv4/netfilter/arp_tables.c1333
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c104
-rw-r--r--net/ipv4/netfilter/arptable_filter.c214
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c167
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c1247
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c501
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c313
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c75
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c279
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c649
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1098
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c146
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c961
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c159
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c88
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c556
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c183
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c430
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c125
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c115
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c178
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c165
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c70
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c319
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c1347
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c349
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c70
-rw-r--r--net/ipv4/netfilter/ip_queue.c741
-rw-r--r--net/ipv4/netfilter/ip_tables.c1964
-rw-r--r--net/ipv4/netfilter/ipt_CLASSIFY.c92
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c761
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c118
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c106
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c175
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c485
-rw-r--r--net/ipv4/netfilter/ipt_MARK.c162
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c207
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c117
-rw-r--r--net/ipv4/netfilter/ipt_NOTRACK.c76
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c129
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c335
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c211
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c262
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c105
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c419
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c77
-rw-r--r--net/ipv4/netfilter/ipt_ah.c117
-rw-r--r--net/ipv4/netfilter/ipt_comment.c59
-rw-r--r--net/ipv4/netfilter/ipt_connmark.c81
-rw-r--r--net/ipv4/netfilter/ipt_conntrack.c136
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c63
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c131
-rw-r--r--net/ipv4/netfilter/ipt_esp.c118
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c731
-rw-r--r--net/ipv4/netfilter/ipt_helper.c113
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c99
-rw-r--r--net/ipv4/netfilter/ipt_length.c64
-rw-r--r--net/ipv4/netfilter/ipt_limit.c157
-rw-r--r--net/ipv4/netfilter/ipt_mac.c79
-rw-r--r--net/ipv4/netfilter/ipt_mark.c64
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c212
-rw-r--r--net/ipv4/netfilter/ipt_owner.c217
-rw-r--r--net/ipv4/netfilter/ipt_physdev.c134
-rw-r--r--net/ipv4/netfilter/ipt_pkttype.c70
-rw-r--r--net/ipv4/netfilter/ipt_realm.c76
-rw-r--r--net/ipv4/netfilter/ipt_recent.c1002
-rw-r--r--net/ipv4/netfilter/ipt_sctp.c203
-rw-r--r--net/ipv4/netfilter/ipt_state.c74
-rw-r--r--net/ipv4/netfilter/ipt_tcpmss.c127
-rw-r--r--net/ipv4/netfilter/ipt_tos.c64
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c79
-rw-r--r--net/ipv4/netfilter/iptable_filter.c194
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c260
-rw-r--r--net/ipv4/netfilter/iptable_raw.c156
76 files changed, 23448 insertions, 0 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
new file mode 100644
index 00000000000..46d4cb1c06f
--- /dev/null
+++ b/net/ipv4/netfilter/Kconfig
@@ -0,0 +1,696 @@
+#
+# IP netfilter configuration
+#
+
+menu "IP: Netfilter Configuration"
+ depends on INET && NETFILTER
+
+# connection tracking, helpers and protocols
+config IP_NF_CONNTRACK
+ tristate "Connection tracking (required for masq/NAT)"
+ ---help---
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections.
+
+ This is required to do Masquerading or other kinds of Network
+ Address Translation (except for Fast NAT). It can also be used to
+ enhance packet filtering (see `Connection state match support'
+ below).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_CT_ACCT
+ bool "Connection tracking flow accounting"
+ depends on IP_NF_CONNTRACK
+ help
+ If this option is enabled, the connection tracking code will
+ keep per-flow packet and byte counters.
+
+ Those counters can be used for flow-based accounting or the
+ `connbytes' match.
+
+ If unsure, say `N'.
+
+config IP_NF_CONNTRACK_MARK
+ bool 'Connection mark tracking support'
+ help
+ This option enables support for connection marks, used by the
+ `CONNMARK' target and `connmark' match. Similar to the mark value
+ of packets, but this mark value is kept in the conntrack session
+ instead of the individual packets.
+
+config IP_NF_CT_PROTO_SCTP
+ tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
+ depends on IP_NF_CONNTRACK && EXPERIMENTAL
+ help
+ With this option enabled, the connection tracking code will
+ be able to do state tracking on SCTP connections.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+config IP_NF_FTP
+ tristate "FTP protocol support"
+ depends on IP_NF_CONNTRACK
+ help
+ Tracking FTP connections is problematic: special helpers are
+ required for tracking them, and doing masquerading and other forms
+ of Network Address Translation on them.
+
+ To compile it as a module, choose M here. If unsure, say Y.
+
+config IP_NF_IRC
+ tristate "IRC protocol support"
+ depends on IP_NF_CONNTRACK
+ ---help---
+ There is a commonly-used extension to IRC called
+ Direct Client-to-Client Protocol (DCC). This enables users to send
+ files to each other, and also chat to each other without the need
+ of a server. DCC Sending is used anywhere you send files over IRC,
+ and DCC Chat is most commonly used by Eggdrop bots. If you are
+ using NAT, this extension will enable you to send files and initiate
+ chats. Note that you do NOT need this extension to get files or
+ have others initiate chats, or everything else in IRC.
+
+ To compile it as a module, choose M here. If unsure, say Y.
+
+config IP_NF_TFTP
+ tristate "TFTP protocol support"
+ depends on IP_NF_CONNTRACK
+ help
+ TFTP connection tracking helper, this is required depending
+ on how restrictive your ruleset is.
+ If you are using a tftp client behind -j SNAT or -j MASQUERADING
+ you will need this.
+
+ To compile it as a module, choose M here. If unsure, say Y.
+
+config IP_NF_AMANDA
+ tristate "Amanda backup protocol support"
+ depends on IP_NF_CONNTRACK
+ help
+ If you are running the Amanda backup package <http://www.amanda.org/>
+ on this machine or machines that will be MASQUERADED through this
+ machine, then you may want to enable this feature. This allows the
+ connection tracking and natting code to allow the sub-channels that
+ Amanda requires for communication of the backup data, messages and
+ index.
+
+ To compile it as a module, choose M here. If unsure, say Y.
+
+config IP_NF_QUEUE
+ tristate "Userspace queueing via NETLINK"
+ help
+ Netfilter has the ability to queue packets to user space: the
+ netlink device can be used to access them using this driver.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_IPTABLES
+ tristate "IP tables support (required for filtering/masq/NAT)"
+ help
+ iptables is a general, extensible packet identification framework.
+ The packet filtering and full NAT (masquerading, port forwarding,
+ etc) subsystems now use this: say `Y' or `M' here if you want to use
+ either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# The matches.
+config IP_NF_MATCH_LIMIT
+ tristate "limit match support"
+ depends on IP_NF_IPTABLES
+ help
+ limit matching allows you to control the rate at which a rule can be
+ matched: mainly useful in combination with the LOG target ("LOG
+ target support", below) and to avoid some Denial of Service attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_IPRANGE
+ tristate "IP range match support"
+ depends on IP_NF_IPTABLES
+ help
+ This option makes possible to match IP addresses against IP address
+ ranges.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_MAC
+ tristate "MAC address match support"
+ depends on IP_NF_IPTABLES
+ help
+ MAC matching allows you to match packets based on the source
+ Ethernet address of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_PKTTYPE
+ tristate "Packet type match support"
+ depends on IP_NF_IPTABLES
+ help
+ Packet type matching allows you to match a packet by
+ its "class", eg. BROADCAST, MULTICAST, ...
+
+ Typical usage:
+ iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_MARK
+ tristate "netfilter MARK match support"
+ depends on IP_NF_IPTABLES
+ help
+ Netfilter mark matching allows you to match packets based on the
+ `nfmark' value in the packet. This can be set by the MARK target
+ (see below).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_MULTIPORT
+ tristate "Multiple port match support"
+ depends on IP_NF_IPTABLES
+ help
+ Multiport matching allows you to match TCP or UDP packets based on
+ a series of source or destination ports: normally a rule can only
+ match a single range of ports.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_TOS
+ tristate "TOS match support"
+ depends on IP_NF_IPTABLES
+ help
+ TOS matching allows you to match packets based on the Type Of
+ Service fields of the IP packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_RECENT
+ tristate "recent match support"
+ depends on IP_NF_IPTABLES
+ help
+ This match is used for creating one or many lists of recently
+ used addresses and then matching against that/those list(s).
+
+ Short options are available by using 'iptables -m recent -h'
+ Official Website: <http://snowman.net/projects/ipt_recent/>
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_ECN
+ tristate "ECN match support"
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `ECN' match, which allows you to match against
+ the IPv4 and TCP header ECN fields.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_DSCP
+ tristate "DSCP match support"
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `DSCP' match, which allows you to match against
+ the IPv4 header DSCP field (DSCP codepoint).
+
+ The DSCP codepoint can have any value between 0x0 and 0x4f.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_AH_ESP
+ tristate "AH/ESP match support"
+ depends on IP_NF_IPTABLES
+ help
+ These two match extensions (`ah' and `esp') allow you to match a
+ range of SPIs inside AH or ESP headers of IPSec packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_LENGTH
+ tristate "LENGTH match support"
+ depends on IP_NF_IPTABLES
+ help
+ This option allows you to match the length of a packet against a
+ specific value or range of values.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_TTL
+ tristate "TTL match support"
+ depends on IP_NF_IPTABLES
+ help
+ This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
+ to match packets by their TTL value.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_TCPMSS
+ tristate "tcpmss match support"
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `tcpmss' match, which allows you to examine the
+ MSS value of TCP SYN packets, which control the maximum packet size
+ for that connection.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_HELPER
+ tristate "Helper match support"
+ depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+ help
+ Helper matching allows you to match packets in dynamic connections
+ tracked by a conntrack-helper, ie. ip_conntrack_ftp
+
+ To compile it as a module, choose M here. If unsure, say Y.
+
+config IP_NF_MATCH_STATE
+ tristate "Connection state match support"
+ depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+ help
+ Connection state matching allows you to match packets based on their
+ relationship to a tracked connection (ie. previous packets). This
+ is a powerful tool for packet classification.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_CONNTRACK
+ tristate "Connection tracking match support"
+ depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+ help
+ This is a general conntrack match module, a superset of the state match.
+
+ It allows matching on additional conntrack information, which is
+ useful in complex configurations, such as NAT gateways with multiple
+ internet links or tunnels.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_OWNER
+ tristate "Owner match support"
+ depends on IP_NF_IPTABLES
+ help
+ Packet owner matching allows you to match locally-generated packets
+ based on who created them: the user, group, process or session.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_PHYSDEV
+ tristate "Physdev match support"
+ depends on IP_NF_IPTABLES && BRIDGE_NETFILTER
+ help
+ Physdev packet matching matches against the physical bridge ports
+ the IP packet arrived on or will leave by.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_ADDRTYPE
+ tristate 'address type match support'
+ depends on IP_NF_IPTABLES
+ help
+ This option allows you to match what routing thinks of an address,
+ eg. UNICAST, LOCAL, BROADCAST, ...
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+config IP_NF_MATCH_REALM
+ tristate 'realm match support'
+ depends on IP_NF_IPTABLES
+ select NET_CLS_ROUTE
+ help
+ This option adds a `realm' match, which allows you to use the realm
+ key from the routing subsystem inside iptables.
+
+ This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
+ in tc world.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+config IP_NF_MATCH_SCTP
+ tristate 'SCTP protocol match support'
+ depends on IP_NF_IPTABLES
+ help
+ With this option enabled, you will be able to use the iptables
+ `sctp' match in order to match on SCTP source/destination ports
+ and SCTP chunk types.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+config IP_NF_MATCH_COMMENT
+ tristate 'comment match support'
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `comment' dummy-match, which allows you to put
+ comments in your iptables ruleset.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+config IP_NF_MATCH_CONNMARK
+ tristate 'Connection mark match support'
+ depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES
+ help
+ This option adds a `connmark' match, which allows you to match the
+ connection mark value previously set for the session by `CONNMARK'.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. The module will be called
+ ipt_connmark.o. If unsure, say `N'.
+
+config IP_NF_MATCH_HASHLIMIT
+ tristate 'hashlimit match support'
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a new iptables `hashlimit' match.
+
+ As opposed to `limit', this match dynamically crates a hash table
+ of limit buckets, based on your selection of source/destination
+ ip addresses and/or ports.
+
+ It enables you to express policies like `10kpps for any given
+ destination IP' or `500pps from any given source IP' with a single
+ IPtables rule.
+
+# `filter', generic and specific targets
+config IP_NF_FILTER
+ tristate "Packet filtering"
+ depends on IP_NF_IPTABLES
+ help
+ Packet filtering defines a table `filter', which has a series of
+ rules for simple packet filtering at local input, forwarding and
+ local output. See the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_REJECT
+ tristate "REJECT target support"
+ depends on IP_NF_FILTER
+ help
+ The REJECT target allows a filtering rule to specify that an ICMP
+ error should be issued in response to an incoming packet, rather
+ than silently being dropped.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_LOG
+ tristate "LOG target support"
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `LOG' target, which allows you to create rules in
+ any iptables table which records the packet header to the syslog.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_ULOG
+ tristate "ULOG target support"
+ depends on IP_NF_IPTABLES
+ ---help---
+ This option adds a `ULOG' target, which allows you to create rules in
+ any iptables table. The packet is passed to a userspace logging
+ daemon using netlink multicast sockets; unlike the LOG target
+ which can only be viewed through syslog.
+
+ The apropriate userspace logging daemon (ulogd) may be obtained from
+ <http://www.gnumonks.org/projects/ulogd/>
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_TCPMSS
+ tristate "TCPMSS target support"
+ depends on IP_NF_IPTABLES
+ ---help---
+ This option adds a `TCPMSS' target, which allows you to alter the
+ MSS value of TCP SYN packets, to control the maximum size for that
+ connection (usually limiting it to your outgoing interface's MTU
+ minus 40).
+
+ This is used to overcome criminally braindead ISPs or servers which
+ block ICMP Fragmentation Needed packets. The symptoms of this
+ problem are that everything works fine from your Linux
+ firewall/router, but machines behind it can never exchange large
+ packets:
+ 1) Web browsers connect, then hang with no data received.
+ 2) Small mail works fine, but large emails hang.
+ 3) ssh works fine, but scp hangs after initial handshaking.
+
+ Workaround: activate this option and add a rule to your firewall
+ configuration like:
+
+ iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
+ -j TCPMSS --clamp-mss-to-pmtu
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# NAT + specific targets
+config IP_NF_NAT
+ tristate "Full NAT"
+ depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
+ help
+ The Full NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. It is controlled by
+ the `nat' table in iptables: see the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_NAT_NEEDED
+ bool
+ depends on IP_NF_NAT != n
+ default y
+
+config IP_NF_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ depends on IP_NF_NAT
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_REDIRECT
+ tristate "REDIRECT target support"
+ depends on IP_NF_NAT
+ help
+ REDIRECT is a special case of NAT: all incoming connections are
+ mapped onto the incoming interface's address, causing the packets to
+ come to the local machine instead of passing through. This is
+ useful for transparent proxies.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_NETMAP
+ tristate "NETMAP target support"
+ depends on IP_NF_NAT
+ help
+ NETMAP is an implementation of static 1:1 NAT mapping of network
+ addresses. It maps the network address part, while keeping the host
+ address part intact. It is similar to Fast NAT, except that
+ Netfilter's connection tracking doesn't work well with Fast NAT.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_SAME
+ tristate "SAME target support"
+ depends on IP_NF_NAT
+ help
+ This option adds a `SAME' target, which works like the standard SNAT
+ target, but attempts to give clients the same IP for all connections.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_NAT_SNMP_BASIC
+ tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && IP_NF_NAT
+ ---help---
+
+ This module implements an Application Layer Gateway (ALG) for
+ SNMP payloads. In conjunction with NAT, it allows a network
+ management system to access multiple private networks with
+ conflicting addresses. It works by modifying IP addresses
+ inside SNMP payloads to match IP-layer NAT mapping.
+
+ This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_NAT_IRC
+ tristate
+ depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+ default IP_NF_NAT if IP_NF_IRC=y
+ default m if IP_NF_IRC=m
+
+# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
+# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. Argh.
+config IP_NF_NAT_FTP
+ tristate
+ depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+ default IP_NF_NAT if IP_NF_FTP=y
+ default m if IP_NF_FTP=m
+
+config IP_NF_NAT_TFTP
+ tristate
+ depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+ default IP_NF_NAT if IP_NF_TFTP=y
+ default m if IP_NF_TFTP=m
+
+config IP_NF_NAT_AMANDA
+ tristate
+ depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+ default IP_NF_NAT if IP_NF_AMANDA=y
+ default m if IP_NF_AMANDA=m
+
+# mangle + specific targets
+config IP_NF_MANGLE
+ tristate "Packet mangling"
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `mangle' table to iptables: see the man page for
+ iptables(8). This table is used for various packet alterations
+ which can effect how the packet is routed.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_TOS
+ tristate "TOS target support"
+ depends on IP_NF_MANGLE
+ help
+ This option adds a `TOS' target, which allows you to create rules in
+ the `mangle' table which alter the Type Of Service field of an IP
+ packet prior to routing.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_ECN
+ tristate "ECN target support"
+ depends on IP_NF_MANGLE
+ ---help---
+ This option adds a `ECN' target, which can be used in the iptables mangle
+ table.
+
+ You can use this target to remove the ECN bits from the IPv4 header of
+ an IP packet. This is particularly useful, if you need to work around
+ existing ECN blackholes on the internet, but don't want to disable
+ ECN support in general.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_DSCP
+ tristate "DSCP target support"
+ depends on IP_NF_MANGLE
+ help
+ This option adds a `DSCP' match, which allows you to match against
+ the IPv4 header DSCP field (DSCP codepoint).
+
+ The DSCP codepoint can have any value between 0x0 and 0x4f.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_MARK
+ tristate "MARK target support"
+ depends on IP_NF_MANGLE
+ help
+ This option adds a `MARK' target, which allows you to create rules
+ in the `mangle' table which alter the netfilter mark (nfmark) field
+ associated with the packet prior to routing. This can change
+ the routing method (see `Use netfilter MARK value as routing
+ key') and can also be used by other subsystems to change their
+ behavior.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_CLASSIFY
+ tristate "CLASSIFY target support"
+ depends on IP_NF_MANGLE
+ help
+ This option adds a `CLASSIFY' target, which enables the user to set
+ the priority of a packet. Some qdiscs can use this value for
+ classification, among these are:
+
+ atm, cbq, dsmark, pfifo_fast, htb, prio
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_CONNMARK
+ tristate 'CONNMARK target support'
+ depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
+ help
+ This option adds a `CONNMARK' target, which allows one to manipulate
+ the connection mark value. Similar to the MARK target, but
+ affects the connection mark value rather than the packet mark value.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. The module will be called
+ ipt_CONNMARK.o. If unsure, say `N'.
+
+config IP_NF_TARGET_CLUSTERIP
+ tristate "CLUSTERIP target support (EXPERIMENTAL)"
+ depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL
+ help
+ The CLUSTERIP target allows you to build load-balancing clusters of
+ network servers without having a dedicated load-balancing
+ router/server/switch.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# raw + specific targets
+config IP_NF_RAW
+ tristate 'raw table support (required for NOTRACK/TRACE)'
+ depends on IP_NF_IPTABLES
+ help
+ This option adds a `raw' table to iptables. This table is the very
+ first in the netfilter framework and hooks in at the PREROUTING
+ and OUTPUT chains.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+config IP_NF_TARGET_NOTRACK
+ tristate 'NOTRACK target support'
+ depends on IP_NF_RAW
+ depends on IP_NF_CONNTRACK
+ help
+ The NOTRACK target allows a select rule to specify
+ which packets *not* to enter the conntrack/NAT
+ subsystem with all the consequences (no ICMP error tracking,
+ no protocol helpers for the selected packets).
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+
+# ARP tables
+config IP_NF_ARPTABLES
+ tristate "ARP tables support"
+ help
+ arptables is a general, extensible packet identification framework.
+ The ARP packet filtering and mangling (manipulation)subsystems
+ use this: say Y or M here if you want to use either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_ARPFILTER
+ tristate "ARP packet filtering"
+ depends on IP_NF_ARPTABLES
+ help
+ ARP packet filtering defines a table `filter', which has a series of
+ rules for simple ARP packet filtering at local input and
+ local output. On a bridge, you can also specify filtering rules
+ for forwarded ARP packets. See the man page for arptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_ARP_MANGLE
+ tristate "ARP payload mangling"
+ depends on IP_NF_ARPTABLES
+ help
+ Allows altering the ARP packet payload: source and destination
+ hardware and network addresses.
+
+endmenu
+
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
new file mode 100644
index 00000000000..45796d5924d
--- /dev/null
+++ b/net/ipv4/netfilter/Makefile
@@ -0,0 +1,89 @@
+#
+# Makefile for the netfilter modules on top of IPv4.
+#
+
+# objects for the standalone - connection tracking / NAT
+ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
+iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+
+# connection tracking
+obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+
+# SCTP protocol connection tracking
+obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
+
+# connection tracking helpers
+obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
+obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
+obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
+obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
+
+# NAT helpers
+obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
+obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
+obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
+obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
+
+# generic IP tables
+obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+
+# the three instances of ip_tables
+obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
+obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+
+# matches
+obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
+obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
+obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
+obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
+obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
+obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
+obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
+obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
+obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
+obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
+obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
+obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
+obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
+obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
+obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
+obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
+obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
+obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
+obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
+obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
+obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
+obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
+obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
+obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
+
+# targets
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
+obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
+obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
+obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
+obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
+obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
+obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
+obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
+obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+
+# generic ARP tables
+obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
+obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+
+# just filtering instance of ARP tables for now
+obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
+
+obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
new file mode 100644
index 00000000000..df79f5ed6a0
--- /dev/null
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -0,0 +1,1333 @@
+/*
+ * Packet matching code for ARP packets.
+ *
+ * Based heavily, if not almost entirely, upon ip_tables.c framework.
+ *
+ * Some ARP specific bits are:
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables core");
+
+/*#define DEBUG_ARP_TABLES*/
+/*#define DEBUG_ARP_TABLES_USER*/
+
+#ifdef DEBUG_ARP_TABLES
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_ARP_TABLES_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define ARP_NF_ASSERT(x) \
+do { \
+ if (!(x)) \
+ printk("ARP_NF_ASSERT: %s:%s:%u\n", \
+ __FUNCTION__, __FILE__, __LINE__); \
+} while(0)
+#else
+#define ARP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+static DECLARE_MUTEX(arpt_mutex);
+
+#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+struct arpt_table_info {
+ unsigned int size;
+ unsigned int number;
+ unsigned int initial_entries;
+ unsigned int hook_entry[NF_ARP_NUMHOOKS];
+ unsigned int underflow[NF_ARP_NUMHOOKS];
+ char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
+};
+
+static LIST_HEAD(arpt_target);
+static LIST_HEAD(arpt_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
+ char *hdr_addr, int len)
+{
+ int i, ret;
+
+ if (len > ARPT_DEV_ADDR_LEN_MAX)
+ len = ARPT_DEV_ADDR_LEN_MAX;
+
+ ret = 0;
+ for (i = 0; i < len; i++)
+ ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
+
+ return (ret != 0);
+}
+
+/* Returns whether packet matches rule or not. */
+static inline int arp_packet_match(const struct arphdr *arphdr,
+ struct net_device *dev,
+ const char *indev,
+ const char *outdev,
+ const struct arpt_arp *arpinfo)
+{
+ char *arpptr = (char *)(arphdr + 1);
+ char *src_devaddr, *tgt_devaddr;
+ u32 src_ipaddr, tgt_ipaddr;
+ int i, ret;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
+
+ if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
+ ARPT_INV_ARPOP)) {
+ dprintf("ARP operation field mismatch.\n");
+ dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
+ arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
+ ARPT_INV_ARPHRD)) {
+ dprintf("ARP hardware address format mismatch.\n");
+ dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
+ arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
+ ARPT_INV_ARPPRO)) {
+ dprintf("ARP protocol address format mismatch.\n");
+ dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
+ arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
+ ARPT_INV_ARPHLN)) {
+ dprintf("ARP hardware address length mismatch.\n");
+ dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
+ arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
+ return 0;
+ }
+
+ src_devaddr = arpptr;
+ arpptr += dev->addr_len;
+ memcpy(&src_ipaddr, arpptr, sizeof(u32));
+ arpptr += sizeof(u32);
+ tgt_devaddr = arpptr;
+ arpptr += dev->addr_len;
+ memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
+
+ if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
+ ARPT_INV_SRCDEVADDR) ||
+ FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
+ ARPT_INV_TGTDEVADDR)) {
+ dprintf("Source or target device address mismatch.\n");
+
+ return 0;
+ }
+
+ if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
+ ARPT_INV_SRCIP) ||
+ FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
+ ARPT_INV_TGTIP)) {
+ dprintf("Source or target IP address mismatch.\n");
+
+ dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(src_ipaddr),
+ NIPQUAD(arpinfo->smsk.s_addr),
+ NIPQUAD(arpinfo->src.s_addr),
+ arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(tgt_ipaddr),
+ NIPQUAD(arpinfo->tmsk.s_addr),
+ NIPQUAD(arpinfo->tgt.s_addr),
+ arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
+ return 0;
+ }
+
+ /* Look for ifname matches. */
+ for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
+ ret |= (indev[i] ^ arpinfo->iniface[i])
+ & arpinfo->iniface_mask[i];
+ }
+
+ if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, arpinfo->iniface,
+ arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
+ return 0;
+ }
+
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ unsigned long odev;
+ memcpy(&odev, outdev + i*sizeof(unsigned long),
+ sizeof(unsigned long));
+ ret |= (odev
+ ^ ((const unsigned long *)arpinfo->outiface)[i])
+ & ((const unsigned long *)arpinfo->outiface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, arpinfo->outiface,
+ arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int arp_checkentry(const struct arpt_arp *arp)
+{
+ if (arp->flags & ~ARPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ arp->flags & ~ARPT_F_MASK);
+ return 0;
+ }
+ if (arp->invflags & ~ARPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ arp->invflags & ~ARPT_INV_MASK);
+ return 0;
+ }
+
+ return 1;
+}
+
+static unsigned int arpt_error(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ if (net_ratelimit())
+ printk("arp_tables: error: '%s'\n", (char *)targinfo);
+
+ return NF_DROP;
+}
+
+static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
+{
+ return (struct arpt_entry *)(base + offset);
+}
+
+unsigned int arpt_do_table(struct sk_buff **pskb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct arpt_table *table,
+ void *userdata)
+{
+ static const char nulldevname[IFNAMSIZ];
+ unsigned int verdict = NF_DROP;
+ struct arphdr *arp;
+ int hotdrop = 0;
+ struct arpt_entry *e, *back;
+ const char *indev, *outdev;
+ void *table_base;
+
+ /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
+ if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
+ (2 * (*pskb)->dev->addr_len) +
+ (2 * sizeof(u32)))))
+ return NF_DROP;
+
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+
+ read_lock_bh(&table->lock);
+ table_base = (void *)table->private->entries
+ + TABLE_OFFSET(table->private,
+ smp_processor_id());
+ e = get_entry(table_base, table->private->hook_entry[hook]);
+ back = get_entry(table_base, table->private->underflow[hook]);
+
+ arp = (*pskb)->nh.arph;
+ do {
+ if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
+ struct arpt_entry_target *t;
+ int hdr_len;
+
+ hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
+ (2 * (*pskb)->dev->addr_len);
+ ADD_COUNTER(e->counters, hdr_len, 1);
+
+ t = arpt_get_target(e);
+
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct arpt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != ARPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
+ if (table_base + v
+ != (void *)e + e->next_offset) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next
+ = (void *)e + e->next_offset;
+ next->comefrom =
+ (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ } else {
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ verdict = t->u.kernel.target->target(pskb,
+ hook,
+ in, out,
+ t->data,
+ userdata);
+
+ /* Target might have changed stuff. */
+ arp = (*pskb)->nh.arph;
+
+ if (verdict == ARPT_CONTINUE)
+ e = (void *)e + e->next_offset;
+ else
+ /* Verdict */
+ break;
+ }
+ } else {
+ e = (void *)e + e->next_offset;
+ }
+ } while (!hotdrop);
+ read_unlock_bh(&table->lock);
+
+ if (hotdrop)
+ return NF_DROP;
+ else
+ return verdict;
+}
+
+static inline void *find_inlist_lock_noload(struct list_head *head,
+ const char *name,
+ int *error,
+ struct semaphore *mutex)
+{
+ void *ret;
+
+ *error = down_interruptible(mutex);
+ if (*error != 0)
+ return NULL;
+
+ ret = list_named_find(head, name);
+ if (!ret) {
+ *error = -ENOENT;
+ up(mutex);
+ }
+ return ret;
+}
+
+#ifndef CONFIG_KMOD
+#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
+#else
+static void *
+find_inlist_lock(struct list_head *head,
+ const char *name,
+ const char *prefix,
+ int *error,
+ struct semaphore *mutex)
+{
+ void *ret;
+
+ ret = find_inlist_lock_noload(head, name, error, mutex);
+ if (!ret) {
+ duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
+ request_module("%s%s", prefix, name);
+ ret = find_inlist_lock_noload(head, name, error, mutex);
+ }
+
+ return ret;
+}
+#endif
+
+static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex);
+}
+
+static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex);
+}
+
+/* All zeroes == unconditional rule. */
+static inline int unconditional(const struct arpt_arp *arp)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
+ if (((__u32 *)arp)[i])
+ return 0;
+
+ return 1;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ * there are loops. Puts hook bitmask in comefrom.
+ */
+static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ * to 0 as we leave), and comefrom to save source hook bitmask.
+ */
+ for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct arpt_entry *e
+ = (struct arpt_entry *)(newinfo->entries + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ struct arpt_standard_target *t
+ = (void *)arpt_get_target(e);
+
+ if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
+ printk("arptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom
+ |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if (e->target_offset == sizeof(struct arpt_entry)
+ && (strcmp(t->target.u.user.name,
+ ARPT_STANDARD_TARGET) == 0)
+ && t->verdict < 0
+ && unconditional(&e->arp)) {
+ unsigned int oldpos, size;
+
+ /* Return: backtrack through the last
+ * big jump.
+ */
+ do {
+ e->comefrom ^= (1<<NF_ARP_NUMHOOKS);
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct arpt_entry *)
+ (newinfo->entries + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct arpt_entry *)
+ (newinfo->entries + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ ARPT_STANDARD_TARGET) == 0
+ && newpos >= 0) {
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct arpt_entry *)
+ (newinfo->entries + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static inline int standard_check(const struct arpt_entry_target *t,
+ unsigned int max_offset)
+{
+ struct arpt_standard_target *targ = (void *)t;
+
+ /* Check standard info. */
+ if (t->u.target_size
+ != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
+ duprintf("arpt_standard_check: target size %u != %Zu\n",
+ t->u.target_size,
+ ARPT_ALIGN(sizeof(struct arpt_standard_target)));
+ return 0;
+ }
+
+ if (targ->verdict >= 0
+ && targ->verdict > max_offset - sizeof(struct arpt_entry)) {
+ duprintf("arpt_standard_check: bad verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+
+ if (targ->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("arpt_standard_check: bad negative verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+ return 1;
+}
+
+static struct arpt_target arpt_standard_target;
+
+static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+ unsigned int *i)
+{
+ struct arpt_entry_target *t;
+ struct arpt_target *target;
+ int ret;
+
+ if (!arp_checkentry(&e->arp)) {
+ duprintf("arp_tables: arp check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ t = arpt_get_target(e);
+ target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex);
+ if (!target) {
+ duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ goto out;
+ }
+ if (!try_module_get((target->me))) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+ t->u.kernel.target = target;
+ up(&arpt_mutex);
+
+ if (t->u.kernel.target == &arpt_standard_target) {
+ if (!standard_check(t, size)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else if (t->u.kernel.target->checkentry
+ && !t->u.kernel.target->checkentry(name, e, t->data,
+ t->u.target_size
+ - sizeof(*t),
+ e->comefrom)) {
+ module_put(t->u.kernel.target->me);
+ duprintf("arp_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ (*i)++;
+ return 0;
+
+out_unlock:
+ up(&arpt_mutex);
+out:
+ return ret;
+}
+
+static inline int check_entry_size_and_hooks(struct arpt_entry *e,
+ struct arpt_table_info *newinfo,
+ unsigned char *base,
+ unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int *i)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
+ || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* FIXME: underflows must be unconditional, standard verdicts
+ < 0 (not ARPT_RETURN). --RR */
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct arpt_counters) { 0, 0 });
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+}
+
+static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
+{
+ struct arpt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ t = arpt_get_target(e);
+ if (t->u.kernel.target->destroy)
+ t->u.kernel.target->destroy(t->data,
+ t->u.target_size - sizeof(*t));
+ module_put(t->u.kernel.target->me);
+ return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ * newinfo).
+ */
+static int translate_table(const char *name,
+ unsigned int valid_hooks,
+ struct arpt_table_info *newinfo,
+ unsigned int size,
+ unsigned int number,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows)
+{
+ unsigned int i;
+ int ret;
+
+ newinfo->size = size;
+ newinfo->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+
+ /* Walk through entries, checking offsets. */
+ ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry_size_and_hooks,
+ newinfo,
+ newinfo->entries,
+ newinfo->entries + size,
+ hook_entries, underflows, &i);
+ duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
+ if (ret != 0)
+ return ret;
+
+ if (i != number) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, number);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, valid_hooks)) {
+ duprintf("Looping hook\n");
+ return -ELOOP;
+ }
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry, name, size, &i);
+
+ if (ret != 0) {
+ ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ cleanup_entry, &i);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for (i = 1; i < num_possible_cpus(); i++) {
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ newinfo->entries,
+ SMP_ALIGN(newinfo->size));
+ }
+
+ return ret;
+}
+
+static struct arpt_table_info *replace_table(struct arpt_table *table,
+ unsigned int num_counters,
+ struct arpt_table_info *newinfo,
+ int *error)
+{
+ struct arpt_table_info *oldinfo;
+
+ /* Do the substitution. */
+ write_lock_bh(&table->lock);
+ /* Check inside lock: is the old number correct? */
+ if (num_counters != table->private->number) {
+ duprintf("num_counters != table->private->number (%u/%u)\n",
+ num_counters, table->private->number);
+ write_unlock_bh(&table->lock);
+ *error = -EAGAIN;
+ return NULL;
+ }
+ oldinfo = table->private;
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+
+ return oldinfo;
+}
+
+/* Gets counters. */
+static inline int add_entry_to_counter(const struct arpt_entry *e,
+ struct arpt_counters total[],
+ unsigned int *i)
+{
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static void get_counters(const struct arpt_table_info *t,
+ struct arpt_counters counters[])
+{
+ unsigned int cpu;
+ unsigned int i;
+
+ for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ i = 0;
+ ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ t->size,
+ add_entry_to_counter,
+ counters,
+ &i);
+ }
+}
+
+static int copy_entries_to_user(unsigned int total_size,
+ struct arpt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num, countersize;
+ struct arpt_entry *e;
+ struct arpt_counters *counters;
+ int ret = 0;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ * (other than comefrom, which userspace doesn't care
+ * about).
+ */
+ countersize = sizeof(struct arpt_counters) * table->private->number;
+ counters = vmalloc(countersize);
+
+ if (counters == NULL)
+ return -ENOMEM;
+
+ /* First, sum counters... */
+ memset(counters, 0, countersize);
+ write_lock_bh(&table->lock);
+ get_counters(table->private, counters);
+ write_unlock_bh(&table->lock);
+
+ /* ... then copy entire thing from CPU 0... */
+ if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ struct arpt_entry_target *t;
+
+ e = (struct arpt_entry *)(table->private->entries + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct arpt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ t = arpt_get_target(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct arpt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+static int get_entries(const struct arpt_get_entries *entries,
+ struct arpt_get_entries __user *uptr)
+{
+ int ret;
+ struct arpt_table *t;
+
+ t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex);
+ if (t) {
+ duprintf("t->private->number = %u\n",
+ t->private->number);
+ if (entries->size == t->private->size)
+ ret = copy_entries_to_user(t->private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ t->private->size,
+ entries->size);
+ ret = -EINVAL;
+ }
+ up(&arpt_mutex);
+ } else
+ duprintf("get_entries: Can't find %s!\n",
+ entries->name);
+
+ return ret;
+}
+
+static int do_replace(void __user *user, unsigned int len)
+{
+ int ret;
+ struct arpt_replace tmp;
+ struct arpt_table *t;
+ struct arpt_table_info *newinfo, *oldinfo;
+ struct arpt_counters *counters;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* Hack: Causes ipchains to give correct error msg --RR */
+ if (len != sizeof(tmp) + tmp.size)
+ return -ENOPROTOOPT;
+
+ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+ return -ENOMEM;
+
+ newinfo = vmalloc(sizeof(struct arpt_table_info)
+ + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ if (!newinfo)
+ return -ENOMEM;
+
+ if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto free_newinfo;
+ }
+ memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
+
+ ret = translate_table(tmp.name, tmp.valid_hooks,
+ newinfo, tmp.size, tmp.num_entries,
+ tmp.hook_entry, tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo_counters;
+
+ duprintf("arp_tables: Translated table\n");
+
+ t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
+ if (!t)
+ goto free_newinfo_counters_untrans;
+
+ /* You lied! */
+ if (tmp.valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ tmp.valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto free_newinfo_counters_untrans_unlock;
+ }
+
+ /* Get a reference in advance, we're not allowed fail later */
+ if (!try_module_get(t->me)) {
+ ret = -EBUSY;
+ goto free_newinfo_counters_untrans_unlock;
+ }
+
+ oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters. */
+ get_counters(oldinfo, counters);
+ /* Decrease module usage counts and free resource */
+ ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+ vfree(oldinfo);
+ if (copy_to_user(tmp.counters, counters,
+ sizeof(struct arpt_counters) * tmp.num_counters) != 0)
+ ret = -EFAULT;
+ vfree(counters);
+ up(&arpt_mutex);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ free_newinfo_counters_untrans_unlock:
+ up(&arpt_mutex);
+ free_newinfo_counters_untrans:
+ ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
+ free_newinfo_counters:
+ vfree(counters);
+ free_newinfo:
+ vfree(newinfo);
+ return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK.
+ */
+static inline int add_counter_to_entry(struct arpt_entry *e,
+ const struct arpt_counters addme[],
+ unsigned int *i)
+{
+
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static int do_add_counters(void __user *user, unsigned int len)
+{
+ unsigned int i;
+ struct arpt_counters_info tmp, *paddc;
+ struct arpt_table *t;
+ int ret;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user, len) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
+ if (!t)
+ goto free;
+
+ write_lock_bh(&t->lock);
+ if (t->private->number != paddc->num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ ARPT_ENTRY_ITERATE(t->private->entries,
+ t->private->size,
+ add_counter_to_entry,
+ paddc->counters,
+ &i);
+ unlock_up_free:
+ write_unlock_bh(&t->lock);
+ up(&arpt_mutex);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_SET_REPLACE:
+ ret = do_replace(user, len);
+ break;
+
+ case ARPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(user, len);
+ break;
+
+ default:
+ duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO: {
+ char name[ARPT_TABLE_MAXNAMELEN];
+ struct arpt_table *t;
+
+ if (*len != sizeof(struct arpt_getinfo)) {
+ duprintf("length %u != %Zu\n", *len,
+ sizeof(struct arpt_getinfo));
+ ret = -EINVAL;
+ break;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+ t = arpt_find_table_lock(name, &ret, &arpt_mutex);
+ if (t) {
+ struct arpt_getinfo info;
+
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, t->private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, t->private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = t->private->number;
+ info.size = t->private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ up(&arpt_mutex);
+ }
+ }
+ break;
+
+ case ARPT_SO_GET_ENTRIES: {
+ struct arpt_get_entries get;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+ ret = -EINVAL;
+ } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+ ret = -EFAULT;
+ } else if (*len != sizeof(struct arpt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %Zu\n", *len,
+ sizeof(struct arpt_get_entries) + get.size);
+ ret = -EINVAL;
+ } else
+ ret = get_entries(&get, user);
+ break;
+ }
+
+ default:
+ duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/* Registration hooks for targets. */
+int arpt_register_target(struct arpt_target *target)
+{
+ int ret;
+
+ ret = down_interruptible(&arpt_mutex);
+ if (ret != 0)
+ return ret;
+
+ if (!list_named_insert(&arpt_target, target)) {
+ duprintf("arpt_register_target: `%s' already in list!\n",
+ target->name);
+ ret = -EINVAL;
+ }
+ up(&arpt_mutex);
+ return ret;
+}
+
+void arpt_unregister_target(struct arpt_target *target)
+{
+ down(&arpt_mutex);
+ LIST_DELETE(&arpt_target, target);
+ up(&arpt_mutex);
+}
+
+int arpt_register_table(struct arpt_table *table,
+ const struct arpt_replace *repl)
+{
+ int ret;
+ struct arpt_table_info *newinfo;
+ static struct arpt_table_info bootstrap
+ = { 0, 0, 0, { 0 }, { 0 }, { } };
+
+ newinfo = vmalloc(sizeof(struct arpt_table_info)
+ + SMP_ALIGN(repl->size) * num_possible_cpus());
+ if (!newinfo) {
+ ret = -ENOMEM;
+ return ret;
+ }
+ memcpy(newinfo->entries, repl->entries, repl->size);
+
+ ret = translate_table(table->name, table->valid_hooks,
+ newinfo, repl->size,
+ repl->num_entries,
+ repl->hook_entry,
+ repl->underflow);
+ duprintf("arpt_register_table: translate table gives %d\n", ret);
+ if (ret != 0) {
+ vfree(newinfo);
+ return ret;
+ }
+
+ ret = down_interruptible(&arpt_mutex);
+ if (ret != 0) {
+ vfree(newinfo);
+ return ret;
+ }
+
+ /* Don't autoload: we'd eat our tail... */
+ if (list_named_find(&arpt_tables, table->name)) {
+ ret = -EEXIST;
+ goto free_unlock;
+ }
+
+ /* Simplifies replace_table code. */
+ table->private = &bootstrap;
+ if (!replace_table(table, 0, newinfo, &ret))
+ goto free_unlock;
+
+ duprintf("table->private->number = %u\n",
+ table->private->number);
+
+ /* save number of initial entries */
+ table->private->initial_entries = table->private->number;
+
+ rwlock_init(&table->lock);
+ list_prepend(&arpt_tables, table);
+
+ unlock:
+ up(&arpt_mutex);
+ return ret;
+
+ free_unlock:
+ vfree(newinfo);
+ goto unlock;
+}
+
+void arpt_unregister_table(struct arpt_table *table)
+{
+ down(&arpt_mutex);
+ LIST_DELETE(&arpt_tables, table);
+ up(&arpt_mutex);
+
+ /* Decrease module usage counts and free resources */
+ ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ cleanup_entry, NULL);
+ vfree(table->private);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct arpt_target arpt_standard_target = {
+ .name = ARPT_STANDARD_TARGET,
+};
+
+static struct arpt_target arpt_error_target = {
+ .name = ARPT_ERROR_TARGET,
+ .target = arpt_error,
+};
+
+static struct nf_sockopt_ops arpt_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = ARPT_BASE_CTL,
+ .set_optmax = ARPT_SO_SET_MAX+1,
+ .set = do_arpt_set_ctl,
+ .get_optmin = ARPT_BASE_CTL,
+ .get_optmax = ARPT_SO_GET_MAX+1,
+ .get = do_arpt_get_ctl,
+};
+
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const struct arpt_table *t,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if ((*count)++ >= start_offset) {
+ unsigned int namelen;
+
+ namelen = sprintf(buffer + *pos, "%s\n", t->name);
+ if (*pos + namelen > length) {
+ /* Stop iterating */
+ return 1;
+ }
+ *pos += namelen;
+ }
+ return 0;
+}
+
+static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&arpt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
+ offset, buffer, length, &pos, &count);
+
+ up(&arpt_mutex);
+
+ /* `start' hack - see fs/proc/generic.c line ~105 */
+ *start=(char *)((unsigned long)count-offset);
+ return pos;
+}
+#endif /*CONFIG_PROC_FS*/
+
+static int __init init(void)
+{
+ int ret;
+
+ /* Noone else will be downing sem now, so we won't sleep */
+ down(&arpt_mutex);
+ list_append(&arpt_target, &arpt_standard_target);
+ list_append(&arpt_target, &arpt_error_target);
+ up(&arpt_mutex);
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&arpt_sockopts);
+ if (ret < 0) {
+ duprintf("Unable to register sockopts.\n");
+ return ret;
+ }
+
+#ifdef CONFIG_PROC_FS
+ {
+ struct proc_dir_entry *proc;
+
+ proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
+ if (!proc) {
+ nf_unregister_sockopt(&arpt_sockopts);
+ return -ENOMEM;
+ }
+ proc->owner = THIS_MODULE;
+ }
+#endif
+
+ printk("arp_tables: (C) 2002 David S. Miller\n");
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ nf_unregister_sockopt(&arpt_sockopts);
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("arp_tables_names");
+#endif
+}
+
+EXPORT_SYMBOL(arpt_register_table);
+EXPORT_SYMBOL(arpt_unregister_table);
+EXPORT_SYMBOL(arpt_do_table);
+EXPORT_SYMBOL(arpt_register_target);
+EXPORT_SYMBOL(arpt_unregister_target);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
new file mode 100644
index 00000000000..3e592ec8648
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -0,0 +1,104 @@
+/* module that allows mangling of the arp payload */
+#include <linux/module.h>
+#include <linux/netfilter_arp/arpt_mangle.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("arptables arp payload mangle target");
+
+static unsigned int
+target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
+ const struct net_device *out, const void *targinfo, void *userinfo)
+{
+ const struct arpt_mangle *mangle = targinfo;
+ struct arphdr *arp;
+ unsigned char *arpptr;
+ int pln, hln;
+
+ if (skb_shared(*pskb) || skb_cloned(*pskb)) {
+ struct sk_buff *nskb;
+
+ nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ if ((*pskb)->sk)
+ skb_set_owner_w(nskb, (*pskb)->sk);
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ }
+
+ arp = (*pskb)->nh.arph;
+ arpptr = (*pskb)->nh.raw + sizeof(*arp);
+ pln = arp->ar_pln;
+ hln = arp->ar_hln;
+ /* We assume that pln and hln were checked in the match */
+ if (mangle->flags & ARPT_MANGLE_SDEV) {
+ if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+ (arpptr + hln > (**pskb).tail))
+ return NF_DROP;
+ memcpy(arpptr, mangle->src_devaddr, hln);
+ }
+ arpptr += hln;
+ if (mangle->flags & ARPT_MANGLE_SIP) {
+ if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+ (arpptr + pln > (**pskb).tail))
+ return NF_DROP;
+ memcpy(arpptr, &mangle->u_s.src_ip, pln);
+ }
+ arpptr += pln;
+ if (mangle->flags & ARPT_MANGLE_TDEV) {
+ if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+ (arpptr + hln > (**pskb).tail))
+ return NF_DROP;
+ memcpy(arpptr, mangle->tgt_devaddr, hln);
+ }
+ arpptr += hln;
+ if (mangle->flags & ARPT_MANGLE_TIP) {
+ if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+ (arpptr + pln > (**pskb).tail))
+ return NF_DROP;
+ memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
+ }
+ return mangle->target;
+}
+
+static int
+checkentry(const char *tablename, const struct arpt_entry *e, void *targinfo,
+ unsigned int targinfosize, unsigned int hook_mask)
+{
+ const struct arpt_mangle *mangle = targinfo;
+
+ if (mangle->flags & ~ARPT_MANGLE_MASK ||
+ !(mangle->flags & ARPT_MANGLE_MASK))
+ return 0;
+
+ if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
+ mangle->target != ARPT_CONTINUE)
+ return 0;
+ return 1;
+}
+
+static struct arpt_target arpt_mangle_reg
+= {
+ .name = "mangle",
+ .target = target,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ if (arpt_register_target(&arpt_mangle_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ arpt_unregister_target(&arpt_mangle_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
new file mode 100644
index 00000000000..0d759f5a4ef
--- /dev/null
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -0,0 +1,214 @@
+/*
+ * Filtering ARP tables module.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
+ (1 << NF_ARP_FORWARD))
+
+/* Standard entry. */
+struct arpt_standard
+{
+ struct arpt_entry entry;
+ struct arpt_standard_target target;
+};
+
+struct arpt_error_target
+{
+ struct arpt_entry_target target;
+ char errorname[ARPT_FUNCTION_MAXNAMELEN];
+};
+
+struct arpt_error
+{
+ struct arpt_entry entry;
+ struct arpt_error_target target;
+};
+
+static struct
+{
+ struct arpt_replace repl;
+ struct arpt_standard entries[3];
+ struct arpt_error term;
+} initial_table __initdata
+= { { "filter", FILTER_VALID_HOOKS, 4,
+ sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
+ { [NF_ARP_IN] = 0,
+ [NF_ARP_OUT] = sizeof(struct arpt_standard),
+ [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), },
+ { [NF_ARP_IN] = 0,
+ [NF_ARP_OUT] = sizeof(struct arpt_standard),
+ [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), },
+ 0, NULL, { } },
+ {
+ /* ARP_IN */
+ {
+ {
+ {
+ { 0 }, { 0 }, { 0 }, { 0 },
+ 0, 0,
+ { { 0, }, { 0, } },
+ { { 0, }, { 0, } },
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ "", "", { 0 }, { 0 },
+ 0, 0
+ },
+ sizeof(struct arpt_entry),
+ sizeof(struct arpt_standard),
+ 0,
+ { 0, 0 }, { } },
+ { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
+ -NF_ACCEPT - 1 }
+ },
+ /* ARP_OUT */
+ {
+ {
+ {
+ { 0 }, { 0 }, { 0 }, { 0 },
+ 0, 0,
+ { { 0, }, { 0, } },
+ { { 0, }, { 0, } },
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ "", "", { 0 }, { 0 },
+ 0, 0
+ },
+ sizeof(struct arpt_entry),
+ sizeof(struct arpt_standard),
+ 0,
+ { 0, 0 }, { } },
+ { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
+ -NF_ACCEPT - 1 }
+ },
+ /* ARP_FORWARD */
+ {
+ {
+ {
+ { 0 }, { 0 }, { 0 }, { 0 },
+ 0, 0,
+ { { 0, }, { 0, } },
+ { { 0, }, { 0, } },
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ "", "", { 0 }, { 0 },
+ 0, 0
+ },
+ sizeof(struct arpt_entry),
+ sizeof(struct arpt_standard),
+ 0,
+ { 0, 0 }, { } },
+ { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
+ -NF_ACCEPT - 1 }
+ }
+ },
+ /* ERROR */
+ {
+ {
+ {
+ { 0 }, { 0 }, { 0 }, { 0 },
+ 0, 0,
+ { { 0, }, { 0, } },
+ { { 0, }, { 0, } },
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ "", "", { 0 }, { 0 },
+ 0, 0
+ },
+ sizeof(struct arpt_entry),
+ sizeof(struct arpt_error),
+ 0,
+ { 0, 0 }, { } },
+ { { { { ARPT_ALIGN(sizeof(struct arpt_error_target)), ARPT_ERROR_TARGET } },
+ { } },
+ "ERROR"
+ }
+ }
+};
+
+static struct arpt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .lock = RW_LOCK_UNLOCKED,
+ .private = NULL,
+ .me = THIS_MODULE,
+};
+
+/* The work comes in here from netfilter.c */
+static unsigned int arpt_hook(unsigned int hook,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+}
+
+static struct nf_hook_ops arpt_ops[] = {
+ {
+ .hook = arpt_hook,
+ .owner = THIS_MODULE,
+ .pf = NF_ARP,
+ .hooknum = NF_ARP_IN,
+ },
+ {
+ .hook = arpt_hook,
+ .owner = THIS_MODULE,
+ .pf = NF_ARP,
+ .hooknum = NF_ARP_OUT,
+ },
+ {
+ .hook = arpt_hook,
+ .owner = THIS_MODULE,
+ .pf = NF_ARP,
+ .hooknum = NF_ARP_FORWARD,
+ },
+};
+
+static int __init init(void)
+{
+ int ret, i;
+
+ /* Register table */
+ ret = arpt_register_table(&packet_filter, &initial_table.repl);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(arpt_ops); i++)
+ if ((ret = nf_register_hook(&arpt_ops[i])) < 0)
+ goto cleanup_hooks;
+ return ret;
+
+cleanup_hooks:
+ while (--i >= 0)
+ nf_unregister_hook(&arpt_ops[i]);
+
+ arpt_unregister_table(&packet_filter);
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(arpt_ops); i++)
+ nf_unregister_hook(&arpt_ops[i]);
+
+ arpt_unregister_table(&packet_filter);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
new file mode 100644
index 00000000000..3dbddd06260
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -0,0 +1,167 @@
+/* Amanda extension for IP connection tracking, Version 0.2
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on HW's ip_conntrack_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Module load syntax:
+ * insmod ip_conntrack_amanda.o [master_timeout=n]
+ *
+ * Where master_timeout is the timeout (in seconds) of the master
+ * connection (port 10080). This defaults to 5 minutes but if
+ * your clients take longer than 5 minutes to do their work
+ * before getting back to the Amanda server, you can increase
+ * this value.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/moduleparam.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
+
+static unsigned int master_timeout = 300;
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda connection tracking module");
+MODULE_LICENSE("GPL");
+module_param(master_timeout, int, 0600);
+MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
+
+static char *conns[] = { "DATA ", "MESG ", "INDEX " };
+
+/* This is slow, but it's simple. --RR */
+static char amanda_buffer[65536];
+static DECLARE_LOCK(amanda_buffer_lock);
+
+unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
+
+static int help(struct sk_buff **pskb,
+ struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+{
+ struct ip_conntrack_expect *exp;
+ char *data, *data_limit, *tmp;
+ unsigned int dataoff, i;
+ u_int16_t port, len;
+ int ret = NF_ACCEPT;
+
+ /* Only look at packets from the Amanda server */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* increase the UDP timeout of the master connection as replies from
+ * Amanda clients to the server can be quite delayed */
+ ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+
+ /* No data? */
+ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ if (dataoff >= (*pskb)->len) {
+ if (net_ratelimit())
+ printk("amanda_help: skblen = %u\n", (*pskb)->len);
+ return NF_ACCEPT;
+ }
+
+ LOCK_BH(&amanda_buffer_lock);
+ skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff);
+ data = amanda_buffer;
+ data_limit = amanda_buffer + (*pskb)->len - dataoff;
+ *data_limit = '\0';
+
+ /* Search for the CONNECT string */
+ data = strstr(data, "CONNECT ");
+ if (!data)
+ goto out;
+ data += strlen("CONNECT ");
+
+ /* Only search first line. */
+ if ((tmp = strchr(data, '\n')))
+ *tmp = '\0';
+
+ for (i = 0; i < ARRAY_SIZE(conns); i++) {
+ char *match = strstr(data, conns[i]);
+ if (!match)
+ continue;
+ tmp = data = match + strlen(conns[i]);
+ port = simple_strtoul(data, &data, 10);
+ len = data - tmp;
+ if (port == 0 || len > 5)
+ break;
+
+ exp = ip_conntrack_expect_alloc();
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+
+ exp->expectfn = NULL;
+ exp->master = ct;
+
+ exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ exp->tuple.src.u.tcp.port = 0;
+ exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+ exp->tuple.dst.protonum = IPPROTO_TCP;
+ exp->tuple.dst.u.tcp.port = htons(port);
+
+ exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.u.tcp.port = 0;
+ exp->mask.dst.ip = 0xFFFFFFFF;
+ exp->mask.dst.protonum = 0xFF;
+ exp->mask.dst.u.tcp.port = 0xFFFF;
+
+ if (ip_nat_amanda_hook)
+ ret = ip_nat_amanda_hook(pskb, ctinfo,
+ tmp - amanda_buffer,
+ len, exp);
+ else if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ }
+ }
+
+out:
+ UNLOCK_BH(&amanda_buffer_lock);
+ return ret;
+}
+
+static struct ip_conntrack_helper amanda_helper = {
+ .max_expected = ARRAY_SIZE(conns),
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "amanda",
+
+ .tuple = { .src = { .u = { __constant_htons(10080) } },
+ .dst = { .protonum = IPPROTO_UDP },
+ },
+ .mask = { .src = { .u = { 0xFFFF } },
+ .dst = { .protonum = 0xFF },
+ },
+};
+
+static void __exit fini(void)
+{
+ ip_conntrack_helper_unregister(&amanda_helper);
+}
+
+static int __init init(void)
+{
+ return ip_conntrack_helper_register(&amanda_helper);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
new file mode 100644
index 00000000000..28d9425d5c3
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -0,0 +1,1247 @@
+/* Connection state tracking for netfilter. This is separated from,
+ but required by, the NAT layer; it can also be used by an iptables
+ extension. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ * - new API and handling of conntrack/nat helpers
+ * - now capable of multiple expectations for one master
+ * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
+ * - add usage/reference counts to ip_conntrack_expect
+ * - export ip_conntrack[_expect]_{find_get,put} functions
+ * */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <linux/stddef.h>
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+
+/* This rwlock protects the main hash table, protocol/helper/expected
+ registrations, conntrack timers*/
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#define IP_CONNTRACK_VERSION "2.1"
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_RWLOCK(ip_conntrack_lock);
+
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+
+void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
+LIST_HEAD(ip_conntrack_expect_list);
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+static LIST_HEAD(helpers);
+unsigned int ip_conntrack_htable_size = 0;
+int ip_conntrack_max;
+struct list_head *ip_conntrack_hash;
+static kmem_cache_t *ip_conntrack_cachep;
+static kmem_cache_t *ip_conntrack_expect_cachep;
+struct ip_conntrack ip_conntrack_untracked;
+unsigned int ip_ct_log_invalid;
+static LIST_HEAD(unconfirmed);
+static int ip_conntrack_vmalloc;
+
+DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
+void
+ip_conntrack_put(struct ip_conntrack *ct)
+{
+ IP_NF_ASSERT(ct);
+ nf_conntrack_put(&ct->ct_general);
+}
+
+static int ip_conntrack_hash_rnd_initted;
+static unsigned int ip_conntrack_hash_rnd;
+
+static u_int32_t
+hash_conntrack(const struct ip_conntrack_tuple *tuple)
+{
+#if 0
+ dump_tuple(tuple);
+#endif
+ return (jhash_3words(tuple->src.ip,
+ (tuple->dst.ip ^ tuple->dst.protonum),
+ (tuple->src.u.all | (tuple->dst.u.all << 16)),
+ ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+}
+
+int
+ip_ct_get_tuple(const struct iphdr *iph,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_protocol *protocol)
+{
+ /* Never happen */
+ if (iph->frag_off & htons(IP_OFFSET)) {
+ printk("ip_conntrack_core: Frag of proto %u.\n",
+ iph->protocol);
+ return 0;
+ }
+
+ tuple->src.ip = iph->saddr;
+ tuple->dst.ip = iph->daddr;
+ tuple->dst.protonum = iph->protocol;
+ tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+ return protocol->pkt_to_tuple(skb, dataoff, tuple);
+}
+
+int
+ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
+ const struct ip_conntrack_tuple *orig,
+ const struct ip_conntrack_protocol *protocol)
+{
+ inverse->src.ip = orig->dst.ip;
+ inverse->dst.ip = orig->src.ip;
+ inverse->dst.protonum = orig->dst.protonum;
+ inverse->dst.dir = !orig->dst.dir;
+
+ return protocol->invert_tuple(inverse, orig);
+}
+
+
+/* ip_conntrack_expect helper functions */
+static void destroy_expect(struct ip_conntrack_expect *exp)
+{
+ ip_conntrack_put(exp->master);
+ IP_NF_ASSERT(!timer_pending(&exp->timeout));
+ kmem_cache_free(ip_conntrack_expect_cachep, exp);
+ CONNTRACK_STAT_INC(expect_delete);
+}
+
+static void unlink_expect(struct ip_conntrack_expect *exp)
+{
+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+ list_del(&exp->list);
+ /* Logically in destroy_expect, but we hold the lock here. */
+ exp->master->expecting--;
+}
+
+static void expectation_timed_out(unsigned long ul_expect)
+{
+ struct ip_conntrack_expect *exp = (void *)ul_expect;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ unlink_expect(exp);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ destroy_expect(exp);
+}
+
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+static struct ip_conntrack_expect *
+find_expectation(const struct ip_conntrack_tuple *tuple)
+{
+ struct ip_conntrack_expect *i;
+
+ list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+ master ct never got confirmed, we'd hold a reference to it
+ and weird things would happen to future packets). */
+ if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
+ && is_confirmed(i->master)
+ && del_timer(&i->timeout)) {
+ unlink_expect(i);
+ return i;
+ }
+ }
+ return NULL;
+}
+
+/* delete all expectations for this conntrack */
+static void remove_expectations(struct ip_conntrack *ct)
+{
+ struct ip_conntrack_expect *i, *tmp;
+
+ /* Optimization: most connection never expect any others. */
+ if (ct->expecting == 0)
+ return;
+
+ list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
+ if (i->master == ct && del_timer(&i->timeout)) {
+ unlink_expect(i);
+ destroy_expect(i);
+ }
+ }
+}
+
+static void
+clean_from_lists(struct ip_conntrack *ct)
+{
+ unsigned int ho, hr;
+
+ DEBUGP("clean_from_lists(%p)\n", ct);
+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+
+ ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+ LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+
+ /* Destroy all pending expectations */
+ remove_expectations(ct);
+}
+
+static void
+destroy_conntrack(struct nf_conntrack *nfct)
+{
+ struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
+ struct ip_conntrack_protocol *proto;
+
+ DEBUGP("destroy_conntrack(%p)\n", ct);
+ IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
+ IP_NF_ASSERT(!timer_pending(&ct->timeout));
+
+ /* To make sure we don't get any weird locking issues here:
+ * destroy_conntrack() MUST NOT be called with a write lock
+ * to ip_conntrack_lock!!! -HW */
+ proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+ if (proto && proto->destroy)
+ proto->destroy(ct);
+
+ if (ip_conntrack_destroyed)
+ ip_conntrack_destroyed(ct);
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Expectations will have been removed in clean_from_lists,
+ * except TFTP can create an expectation on the first packet,
+ * before connection is in the list, so we need to clean here,
+ * too. */
+ remove_expectations(ct);
+
+ /* We overload first tuple to link into unconfirmed list. */
+ if (!is_confirmed(ct)) {
+ BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ }
+
+ CONNTRACK_STAT_INC(delete);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ if (ct->master)
+ ip_conntrack_put(ct->master);
+
+ DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+ kmem_cache_free(ip_conntrack_cachep, ct);
+ atomic_dec(&ip_conntrack_count);
+}
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ struct ip_conntrack *ct = (void *)ul_conntrack;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Inside lock so preempt is disabled on module removal path.
+ * Otherwise we can get spurious warnings. */
+ CONNTRACK_STAT_INC(delete_list);
+ clean_from_lists(ct);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ ip_conntrack_put(ct);
+}
+
+static inline int
+conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
+ const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack *ignored_conntrack)
+{
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+ return tuplehash_to_ctrack(i) != ignored_conntrack
+ && ip_ct_tuple_equal(tuple, &i->tuple);
+}
+
+static struct ip_conntrack_tuple_hash *
+__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack *ignored_conntrack)
+{
+ struct ip_conntrack_tuple_hash *h;
+ unsigned int hash = hash_conntrack(tuple);
+
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+ list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+ if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ CONNTRACK_STAT_INC(found);
+ return h;
+ }
+ CONNTRACK_STAT_INC(searched);
+ }
+
+ return NULL;
+}
+
+/* Find a connection corresponding to a tuple. */
+struct ip_conntrack_tuple_hash *
+ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack *ignored_conntrack)
+{
+ struct ip_conntrack_tuple_hash *h;
+
+ READ_LOCK(&ip_conntrack_lock);
+ h = __ip_conntrack_find(tuple, ignored_conntrack);
+ if (h)
+ atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ return h;
+}
+
+/* Confirm a connection given skb; places it in hash table */
+int
+__ip_conntrack_confirm(struct sk_buff **pskb)
+{
+ unsigned int hash, repl_hash;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+
+ /* ipt_REJECT uses ip_conntrack_attach to attach related
+ ICMP/TCP RST packets in other direction. Actual packet
+ which created connection will be IP_CT_NEW or for an
+ expected connection, IP_CT_RELATED. */
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ /* We're not in hash table, and we refuse to set up related
+ connections for unconfirmed conns. But packet copies and
+ REJECT will give spurious warnings here. */
+ /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
+
+ /* No external references means noone else could have
+ confirmed us. */
+ IP_NF_ASSERT(!is_confirmed(ct));
+ DEBUGP("Confirming conntrack %p\n", ct);
+
+ WRITE_LOCK(&ip_conntrack_lock);
+
+ /* See if there's one in the list already, including reverse:
+ NAT could have grabbed it without realizing, since we're
+ not in the hash. If there is, we lost race. */
+ if (!LIST_FIND(&ip_conntrack_hash[hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+ && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+ /* Remove from unconfirmed list */
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+ list_prepend(&ip_conntrack_hash[hash],
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+ list_prepend(&ip_conntrack_hash[repl_hash],
+ &ct->tuplehash[IP_CT_DIR_REPLY]);
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+ weird delay cases. */
+ ct->timeout.expires += jiffies;
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ CONNTRACK_STAT_INC(insert);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return NF_ACCEPT;
+ }
+
+ CONNTRACK_STAT_INC(insert_failed);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ return NF_DROP;
+}
+
+/* Returns true if a connection correspondings to the tuple (required
+ for NAT). */
+int
+ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack *ignored_conntrack)
+{
+ struct ip_conntrack_tuple_hash *h;
+
+ READ_LOCK(&ip_conntrack_lock);
+ h = __ip_conntrack_find(tuple, ignored_conntrack);
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ return h != NULL;
+}
+
+/* There's a small race here where we may free a just-assured
+ connection. Too bad: we're in trouble anyway. */
+static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
+{
+ return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
+}
+
+static int early_drop(struct list_head *chain)
+{
+ /* Traverse backwards: gives us oldest, which is roughly LRU */
+ struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct = NULL;
+ int dropped = 0;
+
+ READ_LOCK(&ip_conntrack_lock);
+ h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
+ if (h) {
+ ct = tuplehash_to_ctrack(h);
+ atomic_inc(&ct->ct_general.use);
+ }
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ if (!ct)
+ return dropped;
+
+ if (del_timer(&ct->timeout)) {
+ death_by_timeout((unsigned long)ct);
+ dropped = 1;
+ CONNTRACK_STAT_INC(early_drop);
+ }
+ ip_conntrack_put(ct);
+ return dropped;
+}
+
+static inline int helper_cmp(const struct ip_conntrack_helper *i,
+ const struct ip_conntrack_tuple *rtuple)
+{
+ return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
+}
+
+static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+{
+ return LIST_FIND(&helpers, helper_cmp,
+ struct ip_conntrack_helper *,
+ tuple);
+}
+
+/* Allocate a new conntrack: we return -ENOMEM if classification
+ failed due to stress. Otherwise it really is unclassifiable. */
+static struct ip_conntrack_tuple_hash *
+init_conntrack(const struct ip_conntrack_tuple *tuple,
+ struct ip_conntrack_protocol *protocol,
+ struct sk_buff *skb)
+{
+ struct ip_conntrack *conntrack;
+ struct ip_conntrack_tuple repl_tuple;
+ size_t hash;
+ struct ip_conntrack_expect *exp;
+
+ if (!ip_conntrack_hash_rnd_initted) {
+ get_random_bytes(&ip_conntrack_hash_rnd, 4);
+ ip_conntrack_hash_rnd_initted = 1;
+ }
+
+ hash = hash_conntrack(tuple);
+
+ if (ip_conntrack_max
+ && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+ /* Try dropping from this hash chain. */
+ if (!early_drop(&ip_conntrack_hash[hash])) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "ip_conntrack: table full, dropping"
+ " packet.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
+ DEBUGP("Can't invert tuple.\n");
+ return NULL;
+ }
+
+ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+ if (!conntrack) {
+ DEBUGP("Can't allocate conntrack.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memset(conntrack, 0, sizeof(*conntrack));
+ atomic_set(&conntrack->ct_general.use, 1);
+ conntrack->ct_general.destroy = destroy_conntrack;
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
+ if (!protocol->new(conntrack, skb)) {
+ kmem_cache_free(ip_conntrack_cachep, conntrack);
+ return NULL;
+ }
+ /* Don't set timer yet: wait for confirmation */
+ init_timer(&conntrack->timeout);
+ conntrack->timeout.data = (unsigned long)conntrack;
+ conntrack->timeout.function = death_by_timeout;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ exp = find_expectation(tuple);
+
+ if (exp) {
+ DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
+ conntrack, exp);
+ /* Welcome, Mr. Bond. We've been expecting you... */
+ __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
+ conntrack->master = exp->master;
+#if CONFIG_IP_NF_CONNTRACK_MARK
+ conntrack->mark = exp->master->mark;
+#endif
+ nf_conntrack_get(&conntrack->master->ct_general);
+ CONNTRACK_STAT_INC(expect_new);
+ } else {
+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
+
+ CONNTRACK_STAT_INC(new);
+ }
+
+ /* Overload tuple linked list to put us in unconfirmed list. */
+ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+
+ atomic_inc(&ip_conntrack_count);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ if (exp) {
+ if (exp->expectfn)
+ exp->expectfn(conntrack, exp);
+ destroy_expect(exp);
+ }
+
+ return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
+}
+
+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+static inline struct ip_conntrack *
+resolve_normal_ct(struct sk_buff *skb,
+ struct ip_conntrack_protocol *proto,
+ int *set_reply,
+ unsigned int hooknum,
+ enum ip_conntrack_info *ctinfo)
+{
+ struct ip_conntrack_tuple tuple;
+ struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct;
+
+ IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
+
+ if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
+ &tuple,proto))
+ return NULL;
+
+ /* look for tuple match */
+ h = ip_conntrack_find_get(&tuple, NULL);
+ if (!h) {
+ h = init_conntrack(&tuple, proto, skb);
+ if (!h)
+ return NULL;
+ if (IS_ERR(h))
+ return (void *)h;
+ }
+ ct = tuplehash_to_ctrack(h);
+
+ /* It exists; we have (non-exclusive) reference. */
+ if (DIRECTION(h) == IP_CT_DIR_REPLY) {
+ *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
+ /* Please set reply bit if this packet OK */
+ *set_reply = 1;
+ } else {
+ /* Once we've had two way comms, always ESTABLISHED. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ DEBUGP("ip_conntrack_in: normal packet for %p\n",
+ ct);
+ *ctinfo = IP_CT_ESTABLISHED;
+ } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
+ DEBUGP("ip_conntrack_in: related packet for %p\n",
+ ct);
+ *ctinfo = IP_CT_RELATED;
+ } else {
+ DEBUGP("ip_conntrack_in: new packet for %p\n",
+ ct);
+ *ctinfo = IP_CT_NEW;
+ }
+ *set_reply = 0;
+ }
+ skb->nfct = &ct->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return ct;
+}
+
+/* Netfilter hook itself. */
+unsigned int ip_conntrack_in(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack_protocol *proto;
+ int set_reply;
+ int ret;
+
+ /* Previously seen (loopback or untracked)? Ignore. */
+ if ((*pskb)->nfct) {
+ CONNTRACK_STAT_INC(ignore);
+ return NF_ACCEPT;
+ }
+
+ /* Never happen */
+ if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+ if (net_ratelimit()) {
+ printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
+ (*pskb)->nh.iph->protocol, hooknum);
+ }
+ return NF_DROP;
+ }
+
+ /* FIXME: Do this right please. --RR */
+ (*pskb)->nfcache |= NFC_UNKNOWN;
+
+/* Doesn't cover locally-generated broadcast, so not worth it. */
+#if 0
+ /* Ignore broadcast: no `connection'. */
+ if ((*pskb)->pkt_type == PACKET_BROADCAST) {
+ printk("Broadcast packet!\n");
+ return NF_ACCEPT;
+ } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
+ == htonl(0x000000FF)) {
+ printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
+ NIPQUAD((*pskb)->nh.iph->saddr),
+ NIPQUAD((*pskb)->nh.iph->daddr),
+ (*pskb)->sk, (*pskb)->pkt_type);
+ }
+#endif
+
+ proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
+
+ /* It may be an special packet, error, unclean...
+ * inverse of the return code tells to the netfilter
+ * core what to do with the packet. */
+ if (proto->error != NULL
+ && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
+ CONNTRACK_STAT_INC(error);
+ CONNTRACK_STAT_INC(invalid);
+ return -ret;
+ }
+
+ if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
+ /* Not valid part of a connection */
+ CONNTRACK_STAT_INC(invalid);
+ return NF_ACCEPT;
+ }
+
+ if (IS_ERR(ct)) {
+ /* Too stressed to deal. */
+ CONNTRACK_STAT_INC(drop);
+ return NF_DROP;
+ }
+
+ IP_NF_ASSERT((*pskb)->nfct);
+
+ ret = proto->packet(ct, *pskb, ctinfo);
+ if (ret < 0) {
+ /* Invalid: inverse of the return code tells
+ * the netfilter core what to do*/
+ nf_conntrack_put((*pskb)->nfct);
+ (*pskb)->nfct = NULL;
+ CONNTRACK_STAT_INC(invalid);
+ return -ret;
+ }
+
+ if (set_reply)
+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+
+ return ret;
+}
+
+int invert_tuplepr(struct ip_conntrack_tuple *inverse,
+ const struct ip_conntrack_tuple *orig)
+{
+ return ip_ct_invert_tuple(inverse, orig,
+ ip_ct_find_proto(orig->dst.protonum));
+}
+
+/* Would two expected things clash? */
+static inline int expect_clash(const struct ip_conntrack_expect *a,
+ const struct ip_conntrack_expect *b)
+{
+ /* Part covered by intersection of masks must be unequal,
+ otherwise they clash */
+ struct ip_conntrack_tuple intersect_mask
+ = { { a->mask.src.ip & b->mask.src.ip,
+ { a->mask.src.u.all & b->mask.src.u.all } },
+ { a->mask.dst.ip & b->mask.dst.ip,
+ { a->mask.dst.u.all & b->mask.dst.u.all },
+ a->mask.dst.protonum & b->mask.dst.protonum } };
+
+ return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+}
+
+static inline int expect_matches(const struct ip_conntrack_expect *a,
+ const struct ip_conntrack_expect *b)
+{
+ return a->master == b->master
+ && ip_ct_tuple_equal(&a->tuple, &b->tuple)
+ && ip_ct_tuple_equal(&a->mask, &b->mask);
+}
+
+/* Generally a bad idea to call this: could have matched already. */
+void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
+{
+ struct ip_conntrack_expect *i;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* choose the the oldest expectation to evict */
+ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+ if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+ unlink_expect(i);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ destroy_expect(i);
+ return;
+ }
+ }
+ WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
+struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
+{
+ struct ip_conntrack_expect *new;
+
+ new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
+ if (!new) {
+ DEBUGP("expect_related: OOM allocating expect\n");
+ return NULL;
+ }
+ new->master = NULL;
+ return new;
+}
+
+void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
+{
+ kmem_cache_free(ip_conntrack_expect_cachep, expect);
+}
+
+static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
+{
+ atomic_inc(&exp->master->ct_general.use);
+ exp->master->expecting++;
+ list_add(&exp->list, &ip_conntrack_expect_list);
+
+ if (exp->master->helper->timeout) {
+ init_timer(&exp->timeout);
+ exp->timeout.data = (unsigned long)exp;
+ exp->timeout.function = expectation_timed_out;
+ exp->timeout.expires
+ = jiffies + exp->master->helper->timeout * HZ;
+ add_timer(&exp->timeout);
+ } else
+ exp->timeout.function = NULL;
+
+ CONNTRACK_STAT_INC(expect_create);
+}
+
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct ip_conntrack *master)
+{
+ struct ip_conntrack_expect *i;
+
+ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+ if (i->master == master) {
+ if (del_timer(&i->timeout)) {
+ unlink_expect(i);
+ destroy_expect(i);
+ }
+ break;
+ }
+ }
+}
+
+static inline int refresh_timer(struct ip_conntrack_expect *i)
+{
+ if (!del_timer(&i->timeout))
+ return 0;
+
+ i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
+ add_timer(&i->timeout);
+ return 1;
+}
+
+int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
+{
+ struct ip_conntrack_expect *i;
+ int ret;
+
+ DEBUGP("ip_conntrack_expect_related %p\n", related_to);
+ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+ if (expect_matches(i, expect)) {
+ /* Refresh timer: if it's dying, ignore.. */
+ if (refresh_timer(i)) {
+ ret = 0;
+ /* We don't need the one they've given us. */
+ ip_conntrack_expect_free(expect);
+ goto out;
+ }
+ } else if (expect_clash(i, expect)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ /* Will be over limit? */
+ if (expect->master->helper->max_expected &&
+ expect->master->expecting >= expect->master->helper->max_expected)
+ evict_oldest_expect(expect->master);
+
+ ip_conntrack_expect_insert(expect);
+ ret = 0;
+out:
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return ret;
+}
+
+/* Alter reply tuple (maybe alter helper). This is for NAT, and is
+ implicitly racy: see __ip_conntrack_confirm */
+void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
+ const struct ip_conntrack_tuple *newreply)
+{
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Should be unconfirmed, so not in hash table yet */
+ IP_NF_ASSERT(!is_confirmed(conntrack));
+
+ DEBUGP("Altering reply tuple of %p to ", conntrack);
+ DUMP_TUPLE(newreply);
+
+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+ if (!conntrack->master && conntrack->expecting == 0)
+ conntrack->helper = ip_ct_find_helper(newreply);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
+int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+{
+ BUG_ON(me->timeout == 0);
+ WRITE_LOCK(&ip_conntrack_lock);
+ list_prepend(&helpers, me);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ return 0;
+}
+
+static inline int unhelp(struct ip_conntrack_tuple_hash *i,
+ const struct ip_conntrack_helper *me)
+{
+ if (tuplehash_to_ctrack(i)->helper == me)
+ tuplehash_to_ctrack(i)->helper = NULL;
+ return 0;
+}
+
+void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
+{
+ unsigned int i;
+ struct ip_conntrack_expect *exp, *tmp;
+
+ /* Need write lock here, to delete helper. */
+ WRITE_LOCK(&ip_conntrack_lock);
+ LIST_DELETE(&helpers, me);
+
+ /* Get rid of expectations */
+ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
+ if (exp->master->helper == me && del_timer(&exp->timeout)) {
+ unlink_expect(exp);
+ destroy_expect(exp);
+ }
+ }
+ /* Get rid of expecteds, set helpers to NULL. */
+ LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
+ for (i = 0; i < ip_conntrack_htable_size; i++)
+ LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
+ struct ip_conntrack_tuple_hash *, me);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ /* Someone could be still looking at the helper in a bh. */
+ synchronize_net();
+}
+
+static inline void ct_add_counters(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_NF_CT_ACCT
+ if (skb) {
+ ct->counters[CTINFO2DIR(ctinfo)].packets++;
+ ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+ ntohs(skb->nh.iph->tot_len);
+ }
+#endif
+}
+
+/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
+void ip_ct_refresh_acct(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb,
+ unsigned long extra_jiffies)
+{
+ IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+
+ /* If not in hash table, timer will not be active yet */
+ if (!is_confirmed(ct)) {
+ ct->timeout.expires = extra_jiffies;
+ ct_add_counters(ct, ctinfo, skb);
+ } else {
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Need del_timer for race avoidance (may already be dying). */
+ if (del_timer(&ct->timeout)) {
+ ct->timeout.expires = jiffies + extra_jiffies;
+ add_timer(&ct->timeout);
+ }
+ ct_add_counters(ct, ctinfo, skb);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ }
+}
+
+/* Returns new sk_buff, or NULL */
+struct sk_buff *
+ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+ struct sock *sk = skb->sk;
+#ifdef CONFIG_NETFILTER_DEBUG
+ unsigned int olddebug = skb->nf_debug;
+#endif
+
+ if (sk) {
+ sock_hold(sk);
+ skb_orphan(skb);
+ }
+
+ local_bh_disable();
+ skb = ip_defrag(skb, user);
+ local_bh_enable();
+
+ if (!skb) {
+ if (sk)
+ sock_put(sk);
+ return skb;
+ }
+
+ if (sk) {
+ skb_set_owner_w(skb, sk);
+ sock_put(sk);
+ }
+
+ ip_send_check(skb->nh.iph);
+ skb->nfcache |= NFC_ALTERED;
+#ifdef CONFIG_NETFILTER_DEBUG
+ /* Packet path as if nothing had happened. */
+ skb->nf_debug = olddebug;
+#endif
+ return skb;
+}
+
+/* Used by ipt_REJECT. */
+static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ /* This ICMP is in reverse direction to the packet which caused it */
+ ct = ip_conntrack_get(skb, &ctinfo);
+
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+ ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
+ else
+ ctinfo = IP_CT_RELATED;
+
+ /* Attach to new skbuff, and increment count */
+ nskb->nfct = &ct->ct_general;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nskb->nfct);
+}
+
+static inline int
+do_iter(const struct ip_conntrack_tuple_hash *i,
+ int (*iter)(struct ip_conntrack *i, void *data),
+ void *data)
+{
+ return iter(tuplehash_to_ctrack(i), data);
+}
+
+/* Bring out ya dead! */
+static struct ip_conntrack_tuple_hash *
+get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
+ void *data, unsigned int *bucket)
+{
+ struct ip_conntrack_tuple_hash *h = NULL;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
+ h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
+ struct ip_conntrack_tuple_hash *, iter, data);
+ if (h)
+ break;
+ }
+ if (!h)
+ h = LIST_FIND_W(&unconfirmed, do_iter,
+ struct ip_conntrack_tuple_hash *, iter, data);
+ if (h)
+ atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ return h;
+}
+
+void
+ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
+{
+ struct ip_conntrack_tuple_hash *h;
+ unsigned int bucket = 0;
+
+ while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
+ struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+ /* Time to push up daises... */
+ if (del_timer(&ct->timeout))
+ death_by_timeout((unsigned long)ct);
+ /* ... else the timer will get him soon. */
+
+ ip_conntrack_put(ct);
+ }
+}
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+ mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack_tuple tuple;
+
+ IP_CT_TUPLE_U_BLANK(&tuple);
+ tuple.src.ip = inet->rcv_saddr;
+ tuple.src.u.tcp.port = inet->sport;
+ tuple.dst.ip = inet->daddr;
+ tuple.dst.u.tcp.port = inet->dport;
+ tuple.dst.protonum = IPPROTO_TCP;
+
+ /* We only do TCP at the moment: is there a better way? */
+ if (strcmp(sk->sk_prot->name, "TCP")) {
+ DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+ return -ENOPROTOOPT;
+ }
+
+ if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+ DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
+ *len, sizeof(struct sockaddr_in));
+ return -EINVAL;
+ }
+
+ h = ip_conntrack_find_get(&tuple, NULL);
+ if (h) {
+ struct sockaddr_in sin;
+ struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u.tcp.port;
+ sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.ip;
+
+ DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+ NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ ip_conntrack_put(ct);
+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+ return -EFAULT;
+ else
+ return 0;
+ }
+ DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+ NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
+ NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+ .pf = PF_INET,
+ .get_optmin = SO_ORIGINAL_DST,
+ .get_optmax = SO_ORIGINAL_DST+1,
+ .get = &getorigdst,
+};
+
+static int kill_all(struct ip_conntrack *i, void *data)
+{
+ return 1;
+}
+
+static void free_conntrack_hash(void)
+{
+ if (ip_conntrack_vmalloc)
+ vfree(ip_conntrack_hash);
+ else
+ free_pages((unsigned long)ip_conntrack_hash,
+ get_order(sizeof(struct list_head)
+ * ip_conntrack_htable_size));
+}
+
+/* Mishearing the voices in his head, our hero wonders how he's
+ supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
+{
+ ip_ct_attach = NULL;
+ /* This makes sure all current packets have passed through
+ netfilter framework. Roll on, two-stage module
+ delete... */
+ synchronize_net();
+
+ i_see_dead_people:
+ ip_ct_iterate_cleanup(kill_all, NULL);
+ if (atomic_read(&ip_conntrack_count) != 0) {
+ schedule();
+ goto i_see_dead_people;
+ }
+
+ kmem_cache_destroy(ip_conntrack_cachep);
+ kmem_cache_destroy(ip_conntrack_expect_cachep);
+ free_conntrack_hash();
+ nf_unregister_sockopt(&so_getorigdst);
+}
+
+static int hashsize;
+module_param(hashsize, int, 0400);
+
+int __init ip_conntrack_init(void)
+{
+ unsigned int i;
+ int ret;
+
+ /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
+ * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
+ if (hashsize) {
+ ip_conntrack_htable_size = hashsize;
+ } else {
+ ip_conntrack_htable_size
+ = (((num_physpages << PAGE_SHIFT) / 16384)
+ / sizeof(struct list_head));
+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+ ip_conntrack_htable_size = 8192;
+ if (ip_conntrack_htable_size < 16)
+ ip_conntrack_htable_size = 16;
+ }
+ ip_conntrack_max = 8 * ip_conntrack_htable_size;
+
+ printk("ip_conntrack version %s (%u buckets, %d max)"
+ " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
+ ip_conntrack_htable_size, ip_conntrack_max,
+ sizeof(struct ip_conntrack));
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret != 0) {
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+ return ret;
+ }
+
+ /* AK: the hash table is twice as big than needed because it
+ uses list_head. it would be much nicer to caches to use a
+ single pointer list head here. */
+ ip_conntrack_vmalloc = 0;
+ ip_conntrack_hash
+ =(void*)__get_free_pages(GFP_KERNEL,
+ get_order(sizeof(struct list_head)
+ *ip_conntrack_htable_size));
+ if (!ip_conntrack_hash) {
+ ip_conntrack_vmalloc = 1;
+ printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
+ ip_conntrack_hash = vmalloc(sizeof(struct list_head)
+ * ip_conntrack_htable_size);
+ }
+ if (!ip_conntrack_hash) {
+ printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+ goto err_unreg_sockopt;
+ }
+
+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+ sizeof(struct ip_conntrack), 0,
+ 0, NULL, NULL);
+ if (!ip_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+ goto err_free_hash;
+ }
+
+ ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
+ sizeof(struct ip_conntrack_expect),
+ 0, 0, NULL, NULL);
+ if (!ip_conntrack_expect_cachep) {
+ printk(KERN_ERR "Unable to create ip_expect slab cache\n");
+ goto err_free_conntrack_slab;
+ }
+
+ /* Don't NEED lock here, but good form anyway. */
+ WRITE_LOCK(&ip_conntrack_lock);
+ for (i = 0; i < MAX_IP_CT_PROTO; i++)
+ ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+ /* Sew in builtin protocols. */
+ ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+ ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+ ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ for (i = 0; i < ip_conntrack_htable_size; i++)
+ INIT_LIST_HEAD(&ip_conntrack_hash[i]);
+
+ /* For use by ipt_REJECT */
+ ip_ct_attach = ip_conntrack_attach;
+
+ /* Set up fake conntrack:
+ - to never be deleted, not in any hashes */
+ atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+ /* - and look it like as a confirmed connection */
+ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+
+ return ret;
+
+err_free_conntrack_slab:
+ kmem_cache_destroy(ip_conntrack_cachep);
+err_free_hash:
+ free_conntrack_hash();
+err_unreg_sockopt:
+ nf_unregister_sockopt(&so_getorigdst);
+
+ return -ENOMEM;
+}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
new file mode 100644
index 00000000000..12b88cbb11d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -0,0 +1,501 @@
+/* FTP extension for IP connection tracking. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/ctype.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+#include <linux/moduleparam.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp connection tracking helper");
+
+/* This is slow, but it's simple. --RR */
+static char ftp_buffer[65536];
+
+static DECLARE_LOCK(ip_ftp_lock);
+
+#define MAX_PORTS 8
+static int ports[MAX_PORTS];
+static int ports_c;
+module_param_array(ports, int, &ports_c, 0400);
+
+static int loose;
+module_param(loose, int, 0600);
+
+unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp,
+ u32 *seq);
+EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int try_rfc959(const char *, size_t, u_int32_t [], char);
+static int try_eprt(const char *, size_t, u_int32_t [], char);
+static int try_epsv_response(const char *, size_t, u_int32_t [], char);
+
+static struct ftp_search {
+ enum ip_conntrack_dir dir;
+ const char *pattern;
+ size_t plen;
+ char skip;
+ char term;
+ enum ip_ct_ftp_type ftptype;
+ int (*getnum)(const char *, size_t, u_int32_t[], char);
+} search[] = {
+ {
+ IP_CT_DIR_ORIGINAL,
+ "PORT", sizeof("PORT") - 1, ' ', '\r',
+ IP_CT_FTP_PORT,
+ try_rfc959,
+ },
+ {
+ IP_CT_DIR_REPLY,
+ "227 ", sizeof("227 ") - 1, '(', ')',
+ IP_CT_FTP_PASV,
+ try_rfc959,
+ },
+ {
+ IP_CT_DIR_ORIGINAL,
+ "EPRT", sizeof("EPRT") - 1, ' ', '\r',
+ IP_CT_FTP_EPRT,
+ try_eprt,
+ },
+ {
+ IP_CT_DIR_REPLY,
+ "229 ", sizeof("229 ") - 1, '(', ')',
+ IP_CT_FTP_EPSV,
+ try_epsv_response,
+ },
+};
+
+static int try_number(const char *data, size_t dlen, u_int32_t array[],
+ int array_size, char sep, char term)
+{
+ u_int32_t i, len;
+
+ memset(array, 0, sizeof(array[0])*array_size);
+
+ /* Keep data pointing at next char. */
+ for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
+ if (*data >= '0' && *data <= '9') {
+ array[i] = array[i]*10 + *data - '0';
+ }
+ else if (*data == sep)
+ i++;
+ else {
+ /* Unexpected character; true if it's the
+ terminator and we're finished. */
+ if (*data == term && i == array_size - 1)
+ return len;
+
+ DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
+ len, i, *data);
+ return 0;
+ }
+ }
+ DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
+
+ return 0;
+}
+
+/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
+ char term)
+{
+ return try_number(data, dlen, array, 6, ',', term);
+}
+
+/* Grab port: number up to delimiter */
+static int get_port(const char *data, int start, size_t dlen, char delim,
+ u_int32_t array[2])
+{
+ u_int16_t port = 0;
+ int i;
+
+ for (i = start; i < dlen; i++) {
+ /* Finished? */
+ if (data[i] == delim) {
+ if (port == 0)
+ break;
+ array[0] = port >> 8;
+ array[1] = port;
+ return i + 1;
+ }
+ else if (data[i] >= '0' && data[i] <= '9')
+ port = port*10 + data[i] - '0';
+ else /* Some other crap */
+ break;
+ }
+ return 0;
+}
+
+/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
+static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
+ char term)
+{
+ char delim;
+ int length;
+
+ /* First character is delimiter, then "1" for IPv4, then
+ delimiter again. */
+ if (dlen <= 3) return 0;
+ delim = data[0];
+ if (isdigit(delim) || delim < 33 || delim > 126
+ || data[1] != '1' || data[2] != delim)
+ return 0;
+
+ DEBUGP("EPRT: Got |1|!\n");
+ /* Now we have IP address. */
+ length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
+ if (length == 0)
+ return 0;
+
+ DEBUGP("EPRT: Got IP address!\n");
+ /* Start offset includes initial "|1|", and trailing delimiter */
+ return get_port(data, 3 + length + 1, dlen, delim, array+4);
+}
+
+/* Returns 0, or length of numbers: |||6446| */
+static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
+ char term)
+{
+ char delim;
+
+ /* Three delimiters. */
+ if (dlen <= 3) return 0;
+ delim = data[0];
+ if (isdigit(delim) || delim < 33 || delim > 126
+ || data[1] != delim || data[2] != delim)
+ return 0;
+
+ return get_port(data, 3, dlen, delim, array+4);
+}
+
+/* Return 1 for match, 0 for accept, -1 for partial. */
+static int find_pattern(const char *data, size_t dlen,
+ const char *pattern, size_t plen,
+ char skip, char term,
+ unsigned int *numoff,
+ unsigned int *numlen,
+ u_int32_t array[6],
+ int (*getnum)(const char *, size_t, u_int32_t[], char))
+{
+ size_t i;
+
+ DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
+ if (dlen == 0)
+ return 0;
+
+ if (dlen <= plen) {
+ /* Short packet: try for partial? */
+ if (strnicmp(data, pattern, dlen) == 0)
+ return -1;
+ else return 0;
+ }
+
+ if (strnicmp(data, pattern, plen) != 0) {
+#if 0
+ size_t i;
+
+ DEBUGP("ftp: string mismatch\n");
+ for (i = 0; i < plen; i++) {
+ DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
+ i, data[i], data[i],
+ pattern[i], pattern[i]);
+ }
+#endif
+ return 0;
+ }
+
+ DEBUGP("Pattern matches!\n");
+ /* Now we've found the constant string, try to skip
+ to the 'skip' character */
+ for (i = plen; data[i] != skip; i++)
+ if (i == dlen - 1) return -1;
+
+ /* Skip over the last character */
+ i++;
+
+ DEBUGP("Skipped up to `%c'!\n", skip);
+
+ *numoff = i;
+ *numlen = getnum(data + i, dlen - i, array, term);
+ if (!*numlen)
+ return -1;
+
+ DEBUGP("Match succeeded!\n");
+ return 1;
+}
+
+/* Look up to see if we're just after a \n. */
+static int find_nl_seq(u16 seq, const struct ip_ct_ftp_master *info, int dir)
+{
+ unsigned int i;
+
+ for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
+ if (info->seq_aft_nl[dir][i] == seq)
+ return 1;
+ return 0;
+}
+
+/* We don't update if it's older than what we have. */
+static void update_nl_seq(u16 nl_seq, struct ip_ct_ftp_master *info, int dir)
+{
+ unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
+
+ /* Look for oldest: if we find exact match, we're done. */
+ for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
+ if (info->seq_aft_nl[dir][i] == nl_seq)
+ return;
+
+ if (oldest == info->seq_aft_nl_num[dir]
+ || before(info->seq_aft_nl[dir][i], oldest))
+ oldest = i;
+ }
+
+ if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+ info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
+ else if (oldest != NUM_SEQ_TO_REMEMBER)
+ info->seq_aft_nl[dir][oldest] = nl_seq;
+}
+
+static int help(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff, datalen;
+ struct tcphdr _tcph, *th;
+ char *fb_ptr;
+ int ret;
+ u32 seq, array[6] = { 0 };
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int matchlen, matchoff;
+ struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
+ struct ip_conntrack_expect *exp;
+ unsigned int i;
+ int found = 0, ends_in_nl;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED
+ && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+ DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
+ return NF_ACCEPT;
+ }
+
+ th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+
+ dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+ /* No data? */
+ if (dataoff >= (*pskb)->len) {
+ DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
+ return NF_ACCEPT;
+ }
+ datalen = (*pskb)->len - dataoff;
+
+ LOCK_BH(&ip_ftp_lock);
+ fb_ptr = skb_header_pointer(*pskb, dataoff,
+ (*pskb)->len - dataoff, ftp_buffer);
+ BUG_ON(fb_ptr == NULL);
+
+ ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+ seq = ntohl(th->seq) + datalen;
+
+ /* Look up to see if we're just after a \n. */
+ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+ /* Now if this ends in \n, update ftp info. */
+ DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
+ ct_ftp_info->seq_aft_nl[0][dir]
+ old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
+ ret = NF_ACCEPT;
+ goto out_update_nl;
+ }
+
+ /* Initialize IP array to expected address (it's not mentioned
+ in EPSV responses) */
+ array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
+ array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
+ array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
+ array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
+
+ for (i = 0; i < ARRAY_SIZE(search); i++) {
+ if (search[i].dir != dir) continue;
+
+ found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
+ search[i].pattern,
+ search[i].plen,
+ search[i].skip,
+ search[i].term,
+ &matchoff, &matchlen,
+ array,
+ search[i].getnum);
+ if (found) break;
+ }
+ if (found == -1) {
+ /* We don't usually drop packets. After all, this is
+ connection tracking, not packet filtering.
+ However, it is necessary for accurate tracking in
+ this case. */
+ if (net_ratelimit())
+ printk("conntrack_ftp: partial %s %u+%u\n",
+ search[i].pattern,
+ ntohl(th->seq), datalen);
+ ret = NF_DROP;
+ goto out;
+ } else if (found == 0) { /* No match */
+ ret = NF_ACCEPT;
+ goto out_update_nl;
+ }
+
+ DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
+ fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
+
+ /* Allocate expectation which will be inserted */
+ exp = ip_conntrack_expect_alloc();
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+
+ /* We refer to the reverse direction ("!dir") tuples here,
+ * because we're expecting something in the other direction.
+ * Doesn't matter unless NAT is happening. */
+ exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
+
+ if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
+ != ct->tuplehash[dir].tuple.src.ip) {
+ /* Enrico Scholz's passive FTP to partially RNAT'd ftp
+ server: it really wants us to connect to a
+ different IP address. Simply don't record it for
+ NAT. */
+ DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
+ array[0], array[1], array[2], array[3],
+ NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
+
+ /* Thanks to Cristiano Lincoln Mattos
+ <lincoln@cesar.org.br> for reporting this potential
+ problem (DMZ machines opening holes to internal
+ networks, or the packet filter itself). */
+ if (!loose) {
+ ret = NF_ACCEPT;
+ ip_conntrack_expect_free(exp);
+ goto out_update_nl;
+ }
+ exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
+ | (array[2] << 8) | array[3]);
+ }
+
+ exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
+ exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
+ exp->tuple.src.u.tcp.port = 0; /* Don't care. */
+ exp->tuple.dst.protonum = IPPROTO_TCP;
+ exp->mask = ((struct ip_conntrack_tuple)
+ { { 0xFFFFFFFF, { 0 } },
+ { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+
+ exp->expectfn = NULL;
+ exp->master = ct;
+
+ /* Now, NAT might want to mangle the packet, and register the
+ * (possibly changed) expectation itself. */
+ if (ip_nat_ftp_hook)
+ ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+ matchoff, matchlen, exp, &seq);
+ else {
+ /* Can't expect this? Best to drop packet now. */
+ if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ } else
+ ret = NF_ACCEPT;
+ }
+
+out_update_nl:
+ /* Now if this ends in \n, update ftp info. Seq may have been
+ * adjusted by NAT code. */
+ if (ends_in_nl)
+ update_nl_seq(seq, ct_ftp_info,dir);
+ out:
+ UNLOCK_BH(&ip_ftp_lock);
+ return ret;
+}
+
+static struct ip_conntrack_helper ftp[MAX_PORTS];
+static char ftp_names[MAX_PORTS][10];
+
+/* Not __exit: called from init() */
+static void fini(void)
+{
+ int i;
+ for (i = 0; i < ports_c; i++) {
+ DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
+ ports[i]);
+ ip_conntrack_helper_unregister(&ftp[i]);
+ }
+}
+
+static int __init init(void)
+{
+ int i, ret;
+ char *tmpname;
+
+ if (ports_c == 0)
+ ports[ports_c++] = FTP_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+ ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+ ftp[i].mask.src.u.tcp.port = 0xFFFF;
+ ftp[i].mask.dst.protonum = 0xFF;
+ ftp[i].max_expected = 1;
+ ftp[i].timeout = 5 * 60; /* 5 minutes */
+ ftp[i].me = THIS_MODULE;
+ ftp[i].help = help;
+
+ tmpname = &ftp_names[i][0];
+ if (ports[i] == FTP_PORT)
+ sprintf(tmpname, "ftp");
+ else
+ sprintf(tmpname, "ftp-%d", ports[i]);
+ ftp[i].name = tmpname;
+
+ DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+ ports[i]);
+ ret = ip_conntrack_helper_register(&ftp[i]);
+
+ if (ret) {
+ fini();
+ return ret;
+ }
+ }
+ return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
new file mode 100644
index 00000000000..33cc7348b6e
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -0,0 +1,313 @@
+/* IRC extension for IP connection tracking, Version 1.21
+ * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * based on RR's ip_conntrack_ftp.c
+ *
+ * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ **
+ * Module load syntax:
+ * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
+ * max_dcc_channels=n dcc_timeout=secs
+ *
+ * please give the ports of all IRC servers You wish to connect to.
+ * If You don't specify ports, the default will be port 6667.
+ * With max_dcc_channels you can define the maximum number of not
+ * yet answered DCC channels per IRC session (default 8).
+ * With dcc_timeout you can specify how long the system waits for
+ * an expected DCC channel (default 300 seconds).
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+#include <linux/moduleparam.h>
+
+#define MAX_PORTS 8
+static int ports[MAX_PORTS];
+static int ports_c;
+static int max_dcc_channels = 8;
+static unsigned int dcc_timeout = 300;
+/* This is slow, but it's simple. --RR */
+static char irc_buffer[65536];
+static DECLARE_LOCK(irc_buffer_lock);
+
+unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+MODULE_LICENSE("GPL");
+module_param_array(ports, int, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+module_param(max_dcc_channels, int, 0400);
+MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
+module_param(dcc_timeout, int, 0400);
+MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
+
+static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
+#define MINMATCHLEN 5
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
+ __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
+ u_int16_t *port, char **ad_beg_p, char **ad_end_p)
+/* tries to get the ip_addr and port out of a dcc command
+ return value: -1 on failure, 0 on success
+ data pointer to first byte of DCC command data
+ data_end pointer to last byte of dcc command data
+ ip returns parsed ip of dcc command
+ port returns parsed port of dcc command
+ ad_beg_p returns pointer to first byte of addr data
+ ad_end_p returns pointer to last byte of addr data */
+{
+
+ /* at least 12: "AAAAAAAA P\1\n" */
+ while (*data++ != ' ')
+ if (data > data_end - 12)
+ return -1;
+
+ *ad_beg_p = data;
+ *ip = simple_strtoul(data, &data, 10);
+
+ /* skip blanks between ip and port */
+ while (*data == ' ') {
+ if (data >= data_end)
+ return -1;
+ data++;
+ }
+
+ *port = simple_strtoul(data, &data, 10);
+ *ad_end_p = data;
+
+ return 0;
+}
+
+static int help(struct sk_buff **pskb,
+ struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff;
+ struct tcphdr _tcph, *th;
+ char *data, *data_limit, *ib_ptr;
+ int dir = CTINFO2DIR(ctinfo);
+ struct ip_conntrack_expect *exp;
+ u32 seq;
+ u_int32_t dcc_ip;
+ u_int16_t dcc_port;
+ int i, ret = NF_ACCEPT;
+ char *addr_beg_p, *addr_end_p;
+
+ DEBUGP("entered\n");
+
+ /* If packet is coming from IRC server */
+ if (dir == IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED
+ && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+ DEBUGP("Conntrackinfo = %u\n", ctinfo);
+ return NF_ACCEPT;
+ }
+
+ /* Not a full tcp header? */
+ th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+
+ /* No data? */
+ dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+ if (dataoff >= (*pskb)->len)
+ return NF_ACCEPT;
+
+ LOCK_BH(&irc_buffer_lock);
+ ib_ptr = skb_header_pointer(*pskb, dataoff,
+ (*pskb)->len - dataoff, irc_buffer);
+ BUG_ON(ib_ptr == NULL);
+
+ data = ib_ptr;
+ data_limit = ib_ptr + (*pskb)->len - dataoff;
+
+ /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
+ * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
+ while (data < (data_limit - (19 + MINMATCHLEN))) {
+ if (memcmp(data, "\1DCC ", 5)) {
+ data++;
+ continue;
+ }
+
+ data += 5;
+ /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+
+ DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest));
+
+ for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
+ if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
+ /* no match */
+ continue;
+ }
+
+ DEBUGP("DCC %s detected\n", dccprotos[i]);
+ data += strlen(dccprotos[i]);
+ /* we have at least
+ * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+ * data left (== 14/13 bytes) */
+ if (parse_dcc((char *)data, data_limit, &dcc_ip,
+ &dcc_port, &addr_beg_p, &addr_end_p)) {
+ /* unable to parse */
+ DEBUGP("unable to parse dcc command\n");
+ continue;
+ }
+ DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
+ HIPQUAD(dcc_ip), dcc_port);
+
+ /* dcc_ip can be the internal OR external (NAT'ed) IP
+ * Tiago Sousa <mirage@kaotik.org> */
+ if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
+ && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "Forged DCC command from "
+ "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
+ NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
+ HIPQUAD(dcc_ip), dcc_port);
+
+ continue;
+ }
+
+ exp = ip_conntrack_expect_alloc();
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+
+ /* save position of address in dcc string,
+ * necessary for NAT */
+ DEBUGP("tcph->seq = %u\n", th->seq);
+ seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
+
+ /* We refer to the reverse direction ("!dir")
+ * tuples here, because we're expecting
+ * something in the other * direction.
+ * Doesn't matter unless NAT is happening. */
+ exp->tuple = ((struct ip_conntrack_tuple)
+ { { 0, { 0 } },
+ { ct->tuplehash[!dir].tuple.dst.ip,
+ { .tcp = { htons(dcc_port) } },
+ IPPROTO_TCP }});
+ exp->mask = ((struct ip_conntrack_tuple)
+ { { 0, { 0 } },
+ { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+ exp->expectfn = NULL;
+ exp->master = ct;
+ if (ip_nat_irc_hook)
+ ret = ip_nat_irc_hook(pskb, ctinfo,
+ addr_beg_p - ib_ptr,
+ addr_end_p - addr_beg_p,
+ exp);
+ else if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ }
+ goto out;
+ } /* for .. NUM_DCCPROTO */
+ } /* while data < ... */
+
+ out:
+ UNLOCK_BH(&irc_buffer_lock);
+ return ret;
+}
+
+static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
+static char irc_names[MAX_PORTS][10];
+
+static void fini(void);
+
+static int __init init(void)
+{
+ int i, ret;
+ struct ip_conntrack_helper *hlpr;
+ char *tmpname;
+
+ if (max_dcc_channels < 1) {
+ printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
+ return -EBUSY;
+ }
+ if (dcc_timeout < 0) {
+ printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
+ return -EBUSY;
+ }
+
+ /* If no port given, default to standard irc port */
+ if (ports_c == 0)
+ ports[ports_c++] = IRC_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ hlpr = &irc_helpers[i];
+ hlpr->tuple.src.u.tcp.port = htons(ports[i]);
+ hlpr->tuple.dst.protonum = IPPROTO_TCP;
+ hlpr->mask.src.u.tcp.port = 0xFFFF;
+ hlpr->mask.dst.protonum = 0xFF;
+ hlpr->max_expected = max_dcc_channels;
+ hlpr->timeout = dcc_timeout;
+ hlpr->me = THIS_MODULE;
+ hlpr->help = help;
+
+ tmpname = &irc_names[i][0];
+ if (ports[i] == IRC_PORT)
+ sprintf(tmpname, "irc");
+ else
+ sprintf(tmpname, "irc-%d", i);
+ hlpr->name = tmpname;
+
+ DEBUGP("port #%d: %d\n", i, ports[i]);
+
+ ret = ip_conntrack_helper_register(hlpr);
+
+ if (ret) {
+ printk("ip_conntrack_irc: ERROR registering port %d\n",
+ ports[i]);
+ fini();
+ return -EBUSY;
+ }
+ }
+ return 0;
+}
+
+/* This function is intentionally _NOT_ defined as __exit, because
+ * it is needed by the init function */
+static void fini(void)
+{
+ int i;
+ for (i = 0; i < ports_c; i++) {
+ DEBUGP("unregistering port %d\n",
+ ports[i]);
+ ip_conntrack_helper_unregister(&irc_helpers[i]);
+ }
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
new file mode 100644
index 00000000000..88c3712bd25
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -0,0 +1,75 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+
+unsigned long ip_ct_generic_timeout = 600*HZ;
+
+static int generic_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+
+ return 1;
+}
+
+static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int generic_print_tuple(struct seq_file *s,
+ const struct ip_conntrack_tuple *tuple)
+{
+ return 0;
+}
+
+/* Print out the private part of the conntrack. */
+static int generic_print_conntrack(struct seq_file *s,
+ const struct ip_conntrack *state)
+{
+ return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int packet(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
+{
+ return 1;
+}
+
+struct ip_conntrack_protocol ip_conntrack_generic_protocol =
+{
+ .proto = 0,
+ .name = "unknown",
+ .pkt_to_tuple = generic_pkt_to_tuple,
+ .invert_tuple = generic_invert_tuple,
+ .print_tuple = generic_print_tuple,
+ .print_conntrack = generic_print_conntrack,
+ .packet = packet,
+ .new = new,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
new file mode 100644
index 00000000000..602c74db325
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -0,0 +1,279 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+
+unsigned long ip_ct_icmp_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ struct icmphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return 0;
+
+ tuple->dst.u.icmp.type = hp->type;
+ tuple->src.u.icmp.id = hp->un.echo.id;
+ tuple->dst.u.icmp.code = hp->code;
+
+ return 1;
+}
+
+static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ /* Add 1; spaces filled with 0. */
+ static u_int8_t invmap[]
+ = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
+
+ if (orig->dst.u.icmp.type >= sizeof(invmap)
+ || !invmap[orig->dst.u.icmp.type])
+ return 0;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmp_print_tuple(struct seq_file *s,
+ const struct ip_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmp_print_conntrack(struct seq_file *s,
+ const struct ip_conntrack *conntrack)
+{
+ return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct ip_conntrack *ct,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ /* Try to delete connection immediately after all replies:
+ won't actually vanish as we still have skb, and del_timer
+ means this will only run once even if count hits zero twice
+ (theoretically possible with SMP) */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+ if (atomic_dec_and_test(&ct->proto.icmp.count)
+ && del_timer(&ct->timeout))
+ ct->timeout.function((unsigned long)ct);
+ } else {
+ atomic_inc(&ct->proto.icmp.count);
+ ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmp_new(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb)
+{
+ static u_int8_t valid_new[]
+ = { [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1 };
+
+ if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+ || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+ /* Can't create a new ICMP `conn' with this. */
+ DEBUGP("icmp: can't create new conn with type %u\n",
+ conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+ DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+ return 0;
+ }
+ atomic_set(&conntrack->proto.icmp.count, 0);
+ return 1;
+}
+
+static int
+icmp_error_message(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct ip_conntrack_tuple innertuple, origtuple;
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } _in, *inside;
+ struct ip_conntrack_protocol *innerproto;
+ struct ip_conntrack_tuple_hash *h;
+ int dataoff;
+
+ IP_NF_ASSERT(skb->nfct == NULL);
+
+ /* Not enough header? */
+ inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+ if (inside == NULL)
+ return NF_ACCEPT;
+
+ /* Ignore ICMP's containing fragments (shouldn't happen) */
+ if (inside->ip.frag_off & htons(IP_OFFSET)) {
+ DEBUGP("icmp_error_track: fragment of proto %u\n",
+ inside->ip.protocol);
+ return NF_ACCEPT;
+ }
+
+ innerproto = ip_ct_find_proto(inside->ip.protocol);
+ dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
+ /* Are they talking about one of our connections? */
+ if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
+ DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
+ return NF_ACCEPT;
+ }
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+ DEBUGP("icmp_error_track: Can't invert tuple\n");
+ return NF_ACCEPT;
+ }
+
+ *ctinfo = IP_CT_RELATED;
+
+ h = ip_conntrack_find_get(&innertuple, NULL);
+ if (!h) {
+ /* Locally generated ICMPs will match inverted if they
+ haven't been SNAT'ed yet */
+ /* FIXME: NAT code has to handle half-done double NAT --RR */
+ if (hooknum == NF_IP_LOCAL_OUT)
+ h = ip_conntrack_find_get(&origtuple, NULL);
+
+ if (!h) {
+ DEBUGP("icmp_error_track: no match\n");
+ return NF_ACCEPT;
+ }
+ /* Reverse direction from that found */
+ if (DIRECTION(h) != IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ } else {
+ if (DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ }
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct icmphdr _ih, *icmph;
+
+ /* Not enough header? */
+ icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+ if (icmph == NULL) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* See ip_conntrack_proto_tcp.c */
+ if (hooknum != NF_IP_PRE_ROUTING)
+ goto checksum_skipped;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_HW:
+ if (!(u16)csum_fold(skb->csum))
+ break;
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: bad HW ICMP checksum ");
+ return -NF_ACCEPT;
+ case CHECKSUM_NONE:
+ if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: bad ICMP checksum ");
+ return -NF_ACCEPT;
+ }
+ default:
+ break;
+ }
+
+checksum_skipped:
+ /*
+ * 18 is the highest 'known' ICMP type. Anything else is a mystery
+ *
+ * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
+ * discarded.
+ */
+ if (icmph->type > NR_ICMP_TYPES) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: invalid ICMP type ");
+ return -NF_ACCEPT;
+ }
+
+ /* Need to track icmp error message? */
+ if (icmph->type != ICMP_DEST_UNREACH
+ && icmph->type != ICMP_SOURCE_QUENCH
+ && icmph->type != ICMP_TIME_EXCEEDED
+ && icmph->type != ICMP_PARAMETERPROB
+ && icmph->type != ICMP_REDIRECT)
+ return NF_ACCEPT;
+
+ return icmp_error_message(skb, ctinfo, hooknum);
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
+{
+ .proto = IPPROTO_ICMP,
+ .name = "icmp",
+ .pkt_to_tuple = icmp_pkt_to_tuple,
+ .invert_tuple = icmp_invert_tuple,
+ .print_tuple = icmp_print_tuple,
+ .print_conntrack = icmp_print_conntrack,
+ .packet = icmp_packet,
+ .new = icmp_new,
+ .error = icmp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
new file mode 100644
index 00000000000..ff8c34a860f
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -0,0 +1,649 @@
+/*
+ * Connection tracking protocol helper module for SCTP.
+ *
+ * SCTP is defined in RFC 2960. References to various sections in this code
+ * are to this RFC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Added support for proc manipulation of timeouts.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#if 0
+#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.sctp */
+static DECLARE_RWLOCK(sctp_lock);
+
+/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+ closely. They're more complex. --RR
+
+ And so for me for SCTP :D -Kiran */
+
+static const char *sctp_conntrack_names[] = {
+ "NONE",
+ "CLOSED",
+ "COOKIE_WAIT",
+ "COOKIE_ECHOED",
+ "ESTABLISHED",
+ "SHUTDOWN_SENT",
+ "SHUTDOWN_RECD",
+ "SHUTDOWN_ACK_SENT",
+};
+
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+static unsigned long ip_ct_sctp_timeout_closed = 10 SECS;
+static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS;
+static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
+static unsigned long ip_ct_sctp_timeout_established = 5 DAYS;
+static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
+static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
+static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+
+static unsigned long * sctp_timeouts[]
+= { NULL, /* SCTP_CONNTRACK_NONE */
+ &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
+ &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
+ &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
+ &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
+ &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
+ &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
+ &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
+ };
+
+#define sNO SCTP_CONNTRACK_NONE
+#define sCL SCTP_CONNTRACK_CLOSED
+#define sCW SCTP_CONNTRACK_COOKIE_WAIT
+#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
+#define sES SCTP_CONNTRACK_ESTABLISHED
+#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
+#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define sIV SCTP_CONNTRACK_MAX
+
+/*
+ These are the descriptions of the states:
+
+NOTE: These state names are tantalizingly similar to the states of an
+SCTP endpoint. But the interpretation of the states is a little different,
+considering that these are the states of the connection and not of an end
+point. Please note the subtleties. -Kiran
+
+NONE - Nothing so far.
+COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
+ an INIT_ACK chunk in the reply direction.
+COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
+ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
+SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
+SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
+SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+ to that of the SHUTDOWN chunk.
+CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
+ the SHUTDOWN chunk. Connection is closed.
+*/
+
+/* TODO
+ - I have assumed that the first INIT is in the original direction.
+ This messes things when an INIT comes in the reply direction in CLOSED
+ state.
+ - Check the error type in the reply dir before transitioning from
+cookie echoed to closed.
+ - Sec 5.2.4 of RFC 2960
+ - Multi Homing support.
+*/
+
+/* SCTP conntrack state transitions */
+static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+ {
+/* ORIGINAL */
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+ },
+ {
+/* REPLY */
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+ }
+};
+
+static int sctp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ sctp_sctphdr_t _hdr, *hp;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ /* Actually only need first 8 bytes. */
+ hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ if (hp == NULL)
+ return 0;
+
+ tuple->src.u.sctp.port = hp->source;
+ tuple->dst.u.sctp.port = hp->dest;
+ return 1;
+}
+
+static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ tuple->src.u.sctp.port = orig->dst.u.sctp.port;
+ tuple->dst.u.sctp.port = orig->src.u.sctp.port;
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int sctp_print_tuple(struct seq_file *s,
+ const struct ip_conntrack_tuple *tuple)
+{
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int sctp_print_conntrack(struct seq_file *s,
+ const struct ip_conntrack *conntrack)
+{
+ enum sctp_conntrack state;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ READ_LOCK(&sctp_lock);
+ state = conntrack->proto.sctp.state;
+ READ_UNLOCK(&sctp_lock);
+
+ return seq_printf(s, "%s ", sctp_conntrack_names[state]);
+}
+
+#define for_each_sctp_chunk(skb, sch, _sch, offset, count) \
+for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \
+ offset < skb->len && \
+ (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
+ offset += (htons(sch->length) + 3) & ~3, count++)
+
+/* Some validity checks to make sure the chunks are fine */
+static int do_basic_checks(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ char *map)
+{
+ u_int32_t offset, count;
+ sctp_chunkhdr_t _sch, *sch;
+ int flag;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ flag = 0;
+
+ for_each_sctp_chunk (skb, sch, _sch, offset, count) {
+ DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
+
+ if (sch->type == SCTP_CID_INIT
+ || sch->type == SCTP_CID_INIT_ACK
+ || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+ flag = 1;
+ }
+
+ /* Cookie Ack/Echo chunks not the first OR
+ Init / Init Ack / Shutdown compl chunks not the only chunks */
+ if ((sch->type == SCTP_CID_COOKIE_ACK
+ || sch->type == SCTP_CID_COOKIE_ECHO
+ || flag)
+ && count !=0 ) {
+ DEBUGP("Basic checks failed\n");
+ return 1;
+ }
+
+ if (map) {
+ set_bit(sch->type, (void *)map);
+ }
+ }
+
+ DEBUGP("Basic checks passed\n");
+ return 0;
+}
+
+static int new_state(enum ip_conntrack_dir dir,
+ enum sctp_conntrack cur_state,
+ int chunk_type)
+{
+ int i;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ DEBUGP("Chunk type: %d\n", chunk_type);
+
+ switch (chunk_type) {
+ case SCTP_CID_INIT:
+ DEBUGP("SCTP_CID_INIT\n");
+ i = 0; break;
+ case SCTP_CID_INIT_ACK:
+ DEBUGP("SCTP_CID_INIT_ACK\n");
+ i = 1; break;
+ case SCTP_CID_ABORT:
+ DEBUGP("SCTP_CID_ABORT\n");
+ i = 2; break;
+ case SCTP_CID_SHUTDOWN:
+ DEBUGP("SCTP_CID_SHUTDOWN\n");
+ i = 3; break;
+ case SCTP_CID_SHUTDOWN_ACK:
+ DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
+ i = 4; break;
+ case SCTP_CID_ERROR:
+ DEBUGP("SCTP_CID_ERROR\n");
+ i = 5; break;
+ case SCTP_CID_COOKIE_ECHO:
+ DEBUGP("SCTP_CID_COOKIE_ECHO\n");
+ i = 6; break;
+ case SCTP_CID_COOKIE_ACK:
+ DEBUGP("SCTP_CID_COOKIE_ACK\n");
+ i = 7; break;
+ case SCTP_CID_SHUTDOWN_COMPLETE:
+ DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
+ i = 8; break;
+ default:
+ /* Other chunks like DATA, SACK, HEARTBEAT and
+ its ACK do not cause a change in state */
+ DEBUGP("Unknown chunk type, Will stay in %s\n",
+ sctp_conntrack_names[cur_state]);
+ return cur_state;
+ }
+
+ DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
+ dir, sctp_conntrack_names[cur_state], chunk_type,
+ sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+
+ return sctp_conntracks[dir][i][cur_state];
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int sctp_packet(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ enum sctp_conntrack newconntrack, oldsctpstate;
+ struct iphdr *iph = skb->nh.iph;
+ sctp_sctphdr_t _sctph, *sh;
+ sctp_chunkhdr_t _sch, *sch;
+ u_int32_t offset, count;
+ char map[256 / sizeof (char)] = {0};
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
+ if (sh == NULL)
+ return -1;
+
+ if (do_basic_checks(conntrack, skb, map) != 0)
+ return -1;
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, (void *)map)
+ && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
+ && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
+ && !test_bit(SCTP_CID_ABORT, (void *)map)
+ && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
+ && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+ DEBUGP("Verification tag check failed\n");
+ return -1;
+ }
+
+ oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
+ for_each_sctp_chunk (skb, sch, _sch, offset, count) {
+ WRITE_LOCK(&sctp_lock);
+
+ /* Special cases of Verification tag check (Sec 8.5.1) */
+ if (sch->type == SCTP_CID_INIT) {
+ /* Sec 8.5.1 (A) */
+ if (sh->vtag != 0) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ } else if (sch->type == SCTP_CID_ABORT) {
+ /* Sec 8.5.1 (B) */
+ if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+ && !(sh->vtag == conntrack->proto.sctp.vtag
+ [1 - CTINFO2DIR(ctinfo)])) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+ /* Sec 8.5.1 (C) */
+ if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+ && !(sh->vtag == conntrack->proto.sctp.vtag
+ [1 - CTINFO2DIR(ctinfo)]
+ && (sch->flags & 1))) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
+ /* Sec 8.5.1 (D) */
+ if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ }
+
+ oldsctpstate = conntrack->proto.sctp.state;
+ newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
+
+ /* Invalid */
+ if (newconntrack == SCTP_CONNTRACK_MAX) {
+ DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
+ CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+
+ /* If it is an INIT or an INIT ACK note down the vtag */
+ if (sch->type == SCTP_CID_INIT
+ || sch->type == SCTP_CID_INIT_ACK) {
+ sctp_inithdr_t _inithdr, *ih;
+
+ ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+ sizeof(_inithdr), &_inithdr);
+ if (ih == NULL) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ DEBUGP("Setting vtag %x for dir %d\n",
+ ih->init_tag, !CTINFO2DIR(ctinfo));
+ conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
+ }
+
+ conntrack->proto.sctp.state = newconntrack;
+ WRITE_UNLOCK(&sctp_lock);
+ }
+
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
+
+ if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
+ && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
+ && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
+ DEBUGP("Setting assured bit\n");
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int sctp_new(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb)
+{
+ enum sctp_conntrack newconntrack;
+ struct iphdr *iph = skb->nh.iph;
+ sctp_sctphdr_t _sctph, *sh;
+ sctp_chunkhdr_t _sch, *sch;
+ u_int32_t offset, count;
+ char map[256 / sizeof (char)] = {0};
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
+ if (sh == NULL)
+ return 0;
+
+ if (do_basic_checks(conntrack, skb, map) != 0)
+ return 0;
+
+ /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+ if ((test_bit (SCTP_CID_ABORT, (void *)map))
+ || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
+ || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
+ return 0;
+ }
+
+ newconntrack = SCTP_CONNTRACK_MAX;
+ for_each_sctp_chunk (skb, sch, _sch, offset, count) {
+ /* Don't need lock here: this conntrack not in circulation yet */
+ newconntrack = new_state (IP_CT_DIR_ORIGINAL,
+ SCTP_CONNTRACK_NONE, sch->type);
+
+ /* Invalid: delete conntrack */
+ if (newconntrack == SCTP_CONNTRACK_MAX) {
+ DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
+ return 0;
+ }
+
+ /* Copy the vtag into the state info */
+ if (sch->type == SCTP_CID_INIT) {
+ if (sh->vtag == 0) {
+ sctp_inithdr_t _inithdr, *ih;
+
+ ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+ sizeof(_inithdr), &_inithdr);
+ if (ih == NULL)
+ return 0;
+
+ DEBUGP("Setting vtag %x for new conn\n",
+ ih->init_tag);
+
+ conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+ ih->init_tag;
+ } else {
+ /* Sec 8.5.1 (A) */
+ return 0;
+ }
+ }
+ /* If it is a shutdown ack OOTB packet, we expect a return
+ shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+ else {
+ DEBUGP("Setting vtag %x for new conn OOTB\n",
+ sh->vtag);
+ conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+ }
+
+ conntrack->proto.sctp.state = newconntrack;
+ }
+
+ return 1;
+}
+
+static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
+ .proto = IPPROTO_SCTP,
+ .name = "sctp",
+ .pkt_to_tuple = sctp_pkt_to_tuple,
+ .invert_tuple = sctp_invert_tuple,
+ .print_tuple = sctp_print_tuple,
+ .print_conntrack = sctp_print_conntrack,
+ .packet = sctp_packet,
+ .new = sctp_new,
+ .destroy = NULL,
+ .me = THIS_MODULE
+};
+
+#ifdef CONFIG_SYSCTL
+static ctl_table ip_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
+ .procname = "ip_conntrack_sctp_timeout_closed",
+ .data = &ip_ct_sctp_timeout_closed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
+ .procname = "ip_conntrack_sctp_timeout_cookie_wait",
+ .data = &ip_ct_sctp_timeout_cookie_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
+ .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
+ .data = &ip_ct_sctp_timeout_cookie_echoed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
+ .procname = "ip_conntrack_sctp_timeout_established",
+ .data = &ip_ct_sctp_timeout_established,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
+ .data = &ip_ct_sctp_timeout_shutdown_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
+ .data = &ip_ct_sctp_timeout_shutdown_recd,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
+ .data = &ip_ct_sctp_timeout_shutdown_ack_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_netfilter_table[] = {
+ {
+ .ctl_name = NET_IPV4_NETFILTER,
+ .procname = "netfilter",
+ .mode = 0555,
+ .child = ip_ct_sysctl_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_ipv4_table[] = {
+ {
+ .ctl_name = NET_IPV4,
+ .procname = "ipv4",
+ .mode = 0555,
+ .child = ip_ct_netfilter_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_net_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = ip_ct_ipv4_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static struct ctl_table_header *ip_ct_sysctl_header;
+#endif
+
+static int __init init(void)
+{
+ int ret;
+
+ ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
+ if (ret) {
+ printk("ip_conntrack_proto_sctp: protocol register failed\n");
+ goto out;
+ }
+
+#ifdef CONFIG_SYSCTL
+ ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+ if (ip_ct_sysctl_header == NULL) {
+ ret = -ENOMEM;
+ printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
+ goto cleanup;
+ }
+#endif
+
+ return ret;
+
+#ifdef CONFIG_SYSCTL
+ cleanup:
+ ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
+#endif
+ out:
+ DEBUGP("SCTP conntrack module loading %s\n",
+ ret ? "failed": "succeeded");
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ip_ct_sysctl_header);
+#endif
+ DEBUGP("SCTP conntrack module unloaded\n");
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
new file mode 100644
index 00000000000..e800b16fc92
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -0,0 +1,1098 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
+ * - Real stateful connection tracking
+ * - Modified state transitions table
+ * - Window scaling support added
+ * - SACK support added
+ *
+ * Willy Tarreau:
+ * - State table bugfixes
+ * - More robust state changes
+ * - Tuning timer parameters
+ *
+ * version 2.2
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/spinlock.h>
+
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#if 0
+#define DEBUGP printk
+#define DEBUGP_VARS
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.tcp */
+static DECLARE_RWLOCK(tcp_lock);
+
+/* "Be conservative in what you do,
+ be liberal in what you accept from others."
+ If it's non-zero, we mark only out of window RST segments as INVALID. */
+int ip_ct_tcp_be_liberal = 0;
+
+/* When connection is picked up from the middle, how many packets are required
+ to pass in each direction when we assume we are in sync - if any side uses
+ window scaling, we lost the game.
+ If it is set to zero, we disable picking up already established
+ connections. */
+int ip_ct_tcp_loose = 3;
+
+/* Max number of the retransmitted packets without receiving an (acceptable)
+ ACK from the destination. If this number is reached, a shorter timer
+ will be started. */
+int ip_ct_tcp_max_retrans = 3;
+
+ /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+ closely. They're more complex. --RR */
+
+static const char *tcp_conntrack_names[] = {
+ "NONE",
+ "SYN_SENT",
+ "SYN_RECV",
+ "ESTABLISHED",
+ "FIN_WAIT",
+ "CLOSE_WAIT",
+ "LAST_ACK",
+ "TIME_WAIT",
+ "CLOSE",
+ "LISTEN"
+};
+
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS;
+unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS;
+unsigned long ip_ct_tcp_timeout_established = 5 DAYS;
+unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS;
+unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS;
+unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS;
+unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS;
+unsigned long ip_ct_tcp_timeout_close = 10 SECS;
+
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+ Linux uses 15 packets as limit, which corresponds
+ to ~13-30min depending on RTO. */
+unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS;
+
+static unsigned long * tcp_timeouts[]
+= { NULL, /* TCP_CONNTRACK_NONE */
+ &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
+ &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
+ &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
+ &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
+ &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
+ &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
+ &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
+ &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
+ NULL, /* TCP_CONNTRACK_LISTEN */
+ };
+
+#define sNO TCP_CONNTRACK_NONE
+#define sSS TCP_CONNTRACK_SYN_SENT
+#define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
+#define sFW TCP_CONNTRACK_FIN_WAIT
+#define sCW TCP_CONNTRACK_CLOSE_WAIT
+#define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
+#define sLI TCP_CONNTRACK_LISTEN
+#define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
+
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+ TCP_SYN_SET,
+ TCP_SYNACK_SET,
+ TCP_FIN_SET,
+ TCP_ACK_SET,
+ TCP_RST_SET,
+ TCP_NONE_SET,
+};
+
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE: initial state
+ * SYN_SENT: SYN-only packet seen
+ * SYN_RECV: SYN-ACK packet seen
+ * ESTABLISHED: ACK packet seen
+ * FIN_WAIT: FIN packet seen
+ * CLOSE_WAIT: ACK seen (after FIN)
+ * LAST_ACK: FIN seen (after FIN)
+ * TIME_WAIT: last ACK seen
+ * CLOSE: closed connection
+ *
+ * LISTEN state is not used.
+ *
+ * Packets marked as IGNORED (sIG):
+ * if they may be either invalid or valid
+ * and the receiver may send back a connection
+ * closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ * if they are invalid
+ * or we do not support the request (simultaneous open)
+ */
+static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+ {
+/* ORIGINAL */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*
+ * sNO -> sSS Initialize a new connection
+ * sSS -> sSS Retransmitted SYN
+ * sSR -> sIG Late retransmitted SYN?
+ * sES -> sIG Error: SYNs in window outside the SYN_SENT state
+ * are errors. Receiver will reply with RST
+ * and close the connection.
+ * Or we are not in sync and hold a dead connection.
+ * sFW -> sIG
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sSS Reopened connection (RFC 1122).
+ * sCL -> sSS
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * A SYN/ACK from the client is always invalid:
+ * - either it tries to set up a simultaneous open, which is
+ * not supported;
+ * - or the firewall has just been inserted between the two hosts
+ * during the session set-up. The SYN will be retransmitted
+ * by the true client (or it'll time out).
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ * sNO -> sIV Too late and no reason to do anything...
+ * sSS -> sIV Client migth not send FIN in this state:
+ * we enforce waiting for a SYN/ACK reply first.
+ * sSR -> sFW Close started.
+ * sES -> sFW
+ * sFW -> sLA FIN seen in both directions, waiting for
+ * the last ACK.
+ * Migth be a retransmitted FIN as well...
+ * sCW -> sLA
+ * sLA -> sLA Retransmitted FIN. Remain in the same state.
+ * sTW -> sTW
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ * sNO -> sES Assumed.
+ * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
+ * sSR -> sES Established state is reached.
+ * sES -> sES :-)
+ * sFW -> sCW Normal close request answered by ACK.
+ * sCW -> sCW
+ * sLA -> sTW Last ACK detected.
+ * sTW -> sTW Retransmitted last ACK. Remain in the same state.
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+ },
+ {
+/* REPLY */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * sNO -> sIV Never reached.
+ * sSS -> sIV Simultaneous open, not supported
+ * sSR -> sIV Simultaneous open, not supported.
+ * sES -> sIV Server may not initiate a connection.
+ * sFW -> sIV
+ * sCW -> sIV
+ * sLA -> sIV
+ * sTW -> sIV Reopened connection, but server may not do it.
+ * sCL -> sIV
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*
+ * sSS -> sSR Standard open.
+ * sSR -> sSR Retransmitted SYN/ACK.
+ * sES -> sIG Late retransmitted SYN/ACK?
+ * sFW -> sIG Might be SYN/ACK answering ignored SYN
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sIG
+ * sCL -> sIG
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ * sSS -> sIV Server might not send FIN in this state.
+ * sSR -> sFW Close started.
+ * sES -> sFW
+ * sFW -> sLA FIN seen in both directions.
+ * sCW -> sLA
+ * sLA -> sLA Retransmitted FIN.
+ * sTW -> sTW
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ * sSS -> sIV Might be a half-open connection.
+ * sSR -> sSR Might answer late resent SYN.
+ * sES -> sES :-)
+ * sFW -> sCW Normal close request answered by ACK.
+ * sCW -> sCW
+ * sLA -> sTW Last ACK detected.
+ * sTW -> sTW Retransmitted last ACK.
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+ }
+};
+
+static int tcp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ struct tcphdr _hdr, *hp;
+
+ /* Actually only need first 8 bytes. */
+ hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ if (hp == NULL)
+ return 0;
+
+ tuple->src.u.tcp.port = hp->source;
+ tuple->dst.u.tcp.port = hp->dest;
+
+ return 1;
+}
+
+static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ tuple->src.u.tcp.port = orig->dst.u.tcp.port;
+ tuple->dst.u.tcp.port = orig->src.u.tcp.port;
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int tcp_print_tuple(struct seq_file *s,
+ const struct ip_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int tcp_print_conntrack(struct seq_file *s,
+ const struct ip_conntrack *conntrack)
+{
+ enum tcp_conntrack state;
+
+ READ_LOCK(&tcp_lock);
+ state = conntrack->proto.tcp.state;
+ READ_UNLOCK(&tcp_lock);
+
+ return seq_printf(s, "%s ", tcp_conntrack_names[state]);
+}
+
+static unsigned int get_conntrack_index(const struct tcphdr *tcph)
+{
+ if (tcph->rst) return TCP_RST_SET;
+ else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+ else if (tcph->fin) return TCP_FIN_SET;
+ else if (tcph->ack) return TCP_ACK_SET;
+ else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+ in IP Filter' by Guido van Rooij.
+
+ http://www.nluug.nl/events/sane2000/papers.html
+ http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+
+ The boundaries and the conditions are changed according to RFC793:
+ the packet must intersect the window (i.e. segments may be
+ after the right or before the left edge) and thus receivers may ACK
+ segments after the right edge of the window.
+
+ td_maxend = max(sack + max(win,1)) seen in reply packets
+ td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+ td_maxwin += seq + len - sender.td_maxend
+ if seq + len > sender.td_maxend
+ td_end = max(seq + len) seen in sent packets
+
+ I. Upper bound for valid data: seq <= sender.td_maxend
+ II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
+ III. Upper bound for valid ack: sack <= receiver.td_end
+ IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
+
+ where sack is the highest right edge of sack block found in the packet.
+
+ The upper bound limit for a valid ack is not ignored -
+ we doesn't have to deal with fragments.
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+ size_t len,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ return (seq + len - (iph->ihl + tcph->doff)*4
+ + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST 66000
+#define MAXACKWINDOW(sender) \
+ ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
+ : MAXACKWINCONST)
+
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+ struct iphdr *iph,
+ struct tcphdr *tcph,
+ struct ip_ct_tcp_state *state)
+{
+ unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+ unsigned char *ptr;
+ int length = (tcph->doff*4) - sizeof(struct tcphdr);
+
+ if (!length)
+ return;
+
+ ptr = skb_header_pointer(skb,
+ (iph->ihl * 4) + sizeof(struct tcphdr),
+ length, buff);
+ BUG_ON(ptr == NULL);
+
+ state->td_scale =
+ state->flags = 0;
+
+ while (length > 0) {
+ int opcode=*ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize=*ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+ if (opsize > length)
+ break; /* don't parse partial options */
+
+ if (opcode == TCPOPT_SACK_PERM
+ && opsize == TCPOLEN_SACK_PERM)
+ state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+ else if (opcode == TCPOPT_WINDOW
+ && opsize == TCPOLEN_WINDOW) {
+ state->td_scale = *(u_int8_t *)ptr;
+
+ if (state->td_scale > 14) {
+ /* See RFC1323 */
+ state->td_scale = 14;
+ }
+ state->flags |=
+ IP_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+
+static void tcp_sack(const struct sk_buff *skb,
+ struct iphdr *iph,
+ struct tcphdr *tcph,
+ __u32 *sack)
+{
+ unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+ unsigned char *ptr;
+ int length = (tcph->doff*4) - sizeof(struct tcphdr);
+ __u32 tmp;
+
+ if (!length)
+ return;
+
+ ptr = skb_header_pointer(skb,
+ (iph->ihl * 4) + sizeof(struct tcphdr),
+ length, buff);
+ BUG_ON(ptr == NULL);
+
+ /* Fast path for timestamp-only option */
+ if (length == TCPOLEN_TSTAMP_ALIGNED*4
+ && *(__u32 *)ptr ==
+ __constant_ntohl((TCPOPT_NOP << 24)
+ | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
+ return;
+
+ while (length > 0) {
+ int opcode=*ptr++;
+ int opsize, i;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize=*ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+ if (opsize > length)
+ break; /* don't parse partial options */
+
+ if (opcode == TCPOPT_SACK
+ && opsize >= (TCPOLEN_SACK_BASE
+ + TCPOLEN_SACK_PERBLOCK)
+ && !((opsize - TCPOLEN_SACK_BASE)
+ % TCPOLEN_SACK_PERBLOCK)) {
+ for (i = 0;
+ i < (opsize - TCPOLEN_SACK_BASE);
+ i += TCPOLEN_SACK_PERBLOCK) {
+ tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
+
+ if (after(tmp, *sack))
+ *sack = tmp;
+ }
+ return;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+
+static int tcp_in_window(struct ip_ct_tcp *state,
+ enum ip_conntrack_dir dir,
+ unsigned int index,
+ const struct sk_buff *skb,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ struct ip_ct_tcp_state *sender = &state->seen[dir];
+ struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+ __u32 seq, ack, sack, end, win, swin;
+ int res;
+
+ /*
+ * Get the required data from the packet.
+ */
+ seq = ntohl(tcph->seq);
+ ack = sack = ntohl(tcph->ack_seq);
+ win = ntohs(tcph->window);
+ end = segment_seq_plus_len(seq, skb->len, iph, tcph);
+
+ if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+ tcp_sack(skb, iph, tcph, &sack);
+
+ DEBUGP("tcp_in_window: START\n");
+ DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+ "seq=%u ack=%u sack=%u win=%u end=%u\n",
+ NIPQUAD(iph->saddr), ntohs(tcph->source),
+ NIPQUAD(iph->daddr), ntohs(tcph->dest),
+ seq, ack, sack, win, end);
+ DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ if (sender->td_end == 0) {
+ /*
+ * Initialize sender data.
+ */
+ if (tcph->syn && tcph->ack) {
+ /*
+ * Outgoing SYN-ACK in reply to a SYN.
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, iph, tcph, sender);
+ /*
+ * RFC 1323:
+ * Both sides must send the Window Scale option
+ * to enable window scaling in either direction.
+ */
+ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
+ && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
+ sender->td_scale =
+ receiver->td_scale = 0;
+ } else {
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ sender->td_end = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+ sender->td_maxend = end + sender->td_maxwin;
+ }
+ } else if (((state->state == TCP_CONNTRACK_SYN_SENT
+ && dir == IP_CT_DIR_ORIGINAL)
+ || (state->state == TCP_CONNTRACK_SYN_RECV
+ && dir == IP_CT_DIR_REPLY))
+ && after(end, sender->td_end)) {
+ /*
+ * RFC 793: "if a TCP is reinitialized ... then it need
+ * not wait at all; it must only be sure to use sequence
+ * numbers larger than those recently used."
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, iph, tcph, sender);
+ }
+
+ if (!(tcph->ack)) {
+ /*
+ * If there is no ACK, just pretend it was set and OK.
+ */
+ ack = sack = receiver->td_end;
+ } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
+ (TCP_FLAG_ACK|TCP_FLAG_RST))
+ && (ack == 0)) {
+ /*
+ * Broken TCP stacks, that set ACK in RST packets as well
+ * with zero ack value.
+ */
+ ack = sack = receiver->td_end;
+ }
+
+ if (seq == end
+ && (!tcph->rst
+ || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
+ /*
+ * Packets contains no data: we assume it is valid
+ * and check the ack value only.
+ * However RST segments are always validated by their
+ * SEQ number, except when seq == 0 (reset sent answering
+ * SYN.
+ */
+ seq = end = sender->td_end;
+
+ DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+ "seq=%u ack=%u sack =%u win=%u end=%u\n",
+ NIPQUAD(iph->saddr), ntohs(tcph->source),
+ NIPQUAD(iph->daddr), ntohs(tcph->dest),
+ seq, ack, sack, win, end);
+ DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+ before(seq, sender->td_maxend + 1),
+ after(end, sender->td_end - receiver->td_maxwin - 1),
+ before(sack, receiver->td_end + 1),
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+
+ if (sender->loose || receiver->loose ||
+ (before(seq, sender->td_maxend + 1) &&
+ after(end, sender->td_end - receiver->td_maxwin - 1) &&
+ before(sack, receiver->td_end + 1) &&
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+ /*
+ * Take into account window scaling (RFC 1323).
+ */
+ if (!tcph->syn)
+ win <<= sender->td_scale;
+
+ /*
+ * Update sender data.
+ */
+ swin = win + (sack - ack);
+ if (sender->td_maxwin < swin)
+ sender->td_maxwin = swin;
+ if (after(end, sender->td_end))
+ sender->td_end = end;
+ /*
+ * Update receiver data.
+ */
+ if (after(end, sender->td_maxend))
+ receiver->td_maxwin += end - sender->td_maxend;
+ if (after(sack + win, receiver->td_maxend - 1)) {
+ receiver->td_maxend = sack + win;
+ if (win == 0)
+ receiver->td_maxend++;
+ }
+
+ /*
+ * Check retransmissions.
+ */
+ if (index == TCP_ACK_SET) {
+ if (state->last_dir == dir
+ && state->last_seq == seq
+ && state->last_ack == ack
+ && state->last_end == end)
+ state->retrans++;
+ else {
+ state->last_dir = dir;
+ state->last_seq = seq;
+ state->last_ack = ack;
+ state->last_end = end;
+ state->retrans = 0;
+ }
+ }
+ /*
+ * Close the window of disabled window tracking :-)
+ */
+ if (sender->loose)
+ sender->loose--;
+
+ res = 1;
+ } else {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: %s ",
+ before(seq, sender->td_maxend + 1) ?
+ after(end, sender->td_end - receiver->td_maxwin - 1) ?
+ before(sack, receiver->td_end + 1) ?
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+ : "ACK is under the lower bound (possible overly delayed ACK)"
+ : "ACK is over the upper bound (ACKed data not seen yet)"
+ : "SEQ is under the lower bound (already ACKed data retransmitted)"
+ : "SEQ is over the upper bound (over the window of the receiver)");
+
+ res = ip_ct_tcp_be_liberal;
+ }
+
+ DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+ "receiver end=%u maxend=%u maxwin=%u\n",
+ res, sender->td_end, sender->td_maxend, sender->td_maxwin,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+
+ return res;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+/* Update sender->td_end after NAT successfully mangled the packet */
+void ip_conntrack_tcp_update(struct sk_buff *skb,
+ struct ip_conntrack *conntrack,
+ enum ip_conntrack_dir dir)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+ __u32 end;
+#ifdef DEBUGP_VARS
+ struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
+ struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
+#endif
+
+ end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
+
+ WRITE_LOCK(&tcp_lock);
+ /*
+ * We have to worry for the ack in the reply packet only...
+ */
+ if (after(end, conntrack->proto.tcp.seen[dir].td_end))
+ conntrack->proto.tcp.seen[dir].td_end = end;
+ conntrack->proto.tcp.last_end = end;
+ WRITE_UNLOCK(&tcp_lock);
+ DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+}
+
+#endif
+
+#define TH_FIN 0x01
+#define TH_SYN 0x02
+#define TH_RST 0x04
+#define TH_PUSH 0x08
+#define TH_ACK 0x10
+#define TH_URG 0x20
+#define TH_ECE 0x40
+#define TH_CWR 0x80
+
+/* table of valid flag combinations - ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+{
+ [TH_SYN] = 1,
+ [TH_SYN|TH_ACK] = 1,
+ [TH_RST] = 1,
+ [TH_RST|TH_ACK] = 1,
+ [TH_RST|TH_ACK|TH_PUSH] = 1,
+ [TH_FIN|TH_ACK] = 1,
+ [TH_ACK] = 1,
+ [TH_ACK|TH_PUSH] = 1,
+ [TH_ACK|TH_URG] = 1,
+ [TH_ACK|TH_URG|TH_PUSH] = 1,
+ [TH_FIN|TH_ACK|TH_PUSH] = 1,
+ [TH_FIN|TH_ACK|TH_URG] = 1,
+ [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
+};
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
+static int tcp_error(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr _tcph, *th;
+ unsigned int tcplen = skb->len - iph->ihl * 4;
+ u_int8_t tcpflags;
+
+ /* Smaller that minimal TCP header? */
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Not whole TCP header or malformed packet */
+ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: truncated/malformed packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the semantic of CHECKSUM_HW is different there
+ * and moreover root might send raw packets.
+ */
+ /* FIXME: Source route IP option packets --RR */
+ if (hooknum == NF_IP_PRE_ROUTING
+ && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
+ skb->ip_summed == CHECKSUM_HW ? skb->csum
+ : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: bad TCP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ /* Check TCP flags. */
+ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+ if (!tcp_valid_flags[tcpflags]) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid TCP flag combination ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int tcp_packet(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ enum tcp_conntrack new_state, old_state;
+ enum ip_conntrack_dir dir;
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th, _tcph;
+ unsigned long timeout;
+ unsigned int index;
+
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ BUG_ON(th == NULL);
+
+ WRITE_LOCK(&tcp_lock);
+ old_state = conntrack->proto.tcp.state;
+ dir = CTINFO2DIR(ctinfo);
+ index = get_conntrack_index(th);
+ new_state = tcp_conntracks[dir][index][old_state];
+
+ switch (new_state) {
+ case TCP_CONNTRACK_IGNORE:
+ /* Either SYN in ORIGINAL
+ * or SYN/ACK in REPLY. */
+ if (index == TCP_SYNACK_SET
+ && conntrack->proto.tcp.last_index == TCP_SYN_SET
+ && conntrack->proto.tcp.last_dir != dir
+ && ntohl(th->ack_seq) ==
+ conntrack->proto.tcp.last_end) {
+ /* This SYN/ACK acknowledges a SYN that we earlier
+ * ignored as invalid. This means that the client and
+ * the server are both in sync, while the firewall is
+ * not. We kill this session and block the SYN/ACK so
+ * that the client cannot but retransmit its SYN and
+ * thus initiate a clean new session.
+ */
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: killing out of sync session ");
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)
+ conntrack);
+ return -NF_DROP;
+ }
+ conntrack->proto.tcp.last_index = index;
+ conntrack->proto.tcp.last_dir = dir;
+ conntrack->proto.tcp.last_seq = ntohl(th->seq);
+ conntrack->proto.tcp.last_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
+
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid packet ignored ");
+ return NF_ACCEPT;
+ case TCP_CONNTRACK_MAX:
+ /* Invalid packet */
+ DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+ dir, get_conntrack_index(th),
+ old_state);
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid state ");
+ return -NF_ACCEPT;
+ case TCP_CONNTRACK_SYN_SENT:
+ if (old_state < TCP_CONNTRACK_TIME_WAIT)
+ break;
+ if ((conntrack->proto.tcp.seen[dir].flags &
+ IP_CT_TCP_FLAG_CLOSE_INIT)
+ || after(ntohl(th->seq),
+ conntrack->proto.tcp.seen[dir].td_end)) {
+ /* Attempt to reopen a closed connection.
+ * Delete this connection and look up again. */
+ WRITE_UNLOCK(&tcp_lock);
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)
+ conntrack);
+ return -NF_REPEAT;
+ } else {
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid SYN");
+ return -NF_ACCEPT;
+ }
+ case TCP_CONNTRACK_CLOSE:
+ if (index == TCP_RST_SET
+ && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+ && conntrack->proto.tcp.last_index == TCP_SYN_SET
+ && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+ /* RST sent to invalid SYN we had let trough
+ * SYN was in window then, tear down connection.
+ * We skip window checking, because packet might ACK
+ * segments we ignored in the SYN. */
+ goto in_window;
+ }
+ /* Just fall trough */
+ default:
+ /* Keep compilers happy. */
+ break;
+ }
+
+ if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
+ skb, iph, th)) {
+ WRITE_UNLOCK(&tcp_lock);
+ return -NF_ACCEPT;
+ }
+ in_window:
+ /* From now on we have got in-window packets */
+ conntrack->proto.tcp.last_index = index;
+
+ DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+ "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest),
+ (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+ (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+ old_state, new_state);
+
+ conntrack->proto.tcp.state = new_state;
+ if (old_state != new_state
+ && (new_state == TCP_CONNTRACK_FIN_WAIT
+ || new_state == TCP_CONNTRACK_CLOSE))
+ conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+ timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
+ && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
+ ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+ WRITE_UNLOCK(&tcp_lock);
+
+ if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+ /* If only reply is a RST, we can consider ourselves not to
+ have an established connection: this is a fairly common
+ problem case, so we can delete the conntrack
+ immediately. --RR */
+ if (th->rst) {
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)
+ conntrack);
+ return NF_ACCEPT;
+ }
+ } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+ && (old_state == TCP_CONNTRACK_SYN_RECV
+ || old_state == TCP_CONNTRACK_ESTABLISHED)
+ && new_state == TCP_CONNTRACK_ESTABLISHED) {
+ /* Set ASSURED if we see see valid ack in ESTABLISHED
+ after SYN_RECV or a valid answer for a picked up
+ connection. */
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ }
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int tcp_new(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb)
+{
+ enum tcp_conntrack new_state;
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th, _tcph;
+#ifdef DEBUGP_VARS
+ struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
+ struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
+#endif
+
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ BUG_ON(th == NULL);
+
+ /* Don't need lock here: this conntrack not in circulation yet */
+ new_state
+ = tcp_conntracks[0][get_conntrack_index(th)]
+ [TCP_CONNTRACK_NONE];
+
+ /* Invalid: delete conntrack */
+ if (new_state >= TCP_CONNTRACK_MAX) {
+ DEBUGP("ip_ct_tcp: invalid new deleting.\n");
+ return 0;
+ }
+
+ if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ /* SYN packet */
+ conntrack->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ iph, th);
+ conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+ conntrack->proto.tcp.seen[0].td_maxwin = 1;
+ conntrack->proto.tcp.seen[0].td_maxend =
+ conntrack->proto.tcp.seen[0].td_end;
+
+ tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
+ conntrack->proto.tcp.seen[1].flags = 0;
+ conntrack->proto.tcp.seen[0].loose =
+ conntrack->proto.tcp.seen[1].loose = 0;
+ } else if (ip_ct_tcp_loose == 0) {
+ /* Don't try to pick up connections. */
+ return 0;
+ } else {
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ conntrack->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ iph, th);
+ conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+ conntrack->proto.tcp.seen[0].td_maxwin = 1;
+ conntrack->proto.tcp.seen[0].td_maxend =
+ conntrack->proto.tcp.seen[0].td_end +
+ conntrack->proto.tcp.seen[0].td_maxwin;
+ conntrack->proto.tcp.seen[0].td_scale = 0;
+
+ /* We assume SACK. Should we assume window scaling too? */
+ conntrack->proto.tcp.seen[0].flags =
+ conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+ conntrack->proto.tcp.seen[0].loose =
+ conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
+ }
+
+ conntrack->proto.tcp.seen[1].td_end = 0;
+ conntrack->proto.tcp.seen[1].td_maxend = 0;
+ conntrack->proto.tcp.seen[1].td_maxwin = 1;
+ conntrack->proto.tcp.seen[1].td_scale = 0;
+
+ /* tcp_packet will set them */
+ conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
+ conntrack->proto.tcp.last_index = TCP_NONE_SET;
+
+ DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+ return 1;
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
+{
+ .proto = IPPROTO_TCP,
+ .name = "tcp",
+ .pkt_to_tuple = tcp_pkt_to_tuple,
+ .invert_tuple = tcp_invert_tuple,
+ .print_tuple = tcp_print_tuple,
+ .print_conntrack = tcp_print_conntrack,
+ .packet = tcp_packet,
+ .new = tcp_new,
+ .error = tcp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
new file mode 100644
index 00000000000..5bc28a22462
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -0,0 +1,146 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/seq_file.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+
+unsigned long ip_ct_udp_timeout = 30*HZ;
+unsigned long ip_ct_udp_timeout_stream = 180*HZ;
+
+static int udp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ struct udphdr _hdr, *hp;
+
+ /* Actually only need first 8 bytes. */
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return 0;
+
+ tuple->src.u.udp.port = hp->source;
+ tuple->dst.u.udp.port = hp->dest;
+
+ return 1;
+}
+
+static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ tuple->src.u.udp.port = orig->dst.u.udp.port;
+ tuple->dst.u.udp.port = orig->src.u.udp.port;
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int udp_print_tuple(struct seq_file *s,
+ const struct ip_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int udp_print_conntrack(struct seq_file *s,
+ const struct ip_conntrack *conntrack)
+{
+ return 0;
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+static int udp_packet(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ /* If we've seen traffic both ways, this is some kind of UDP
+ stream. Extend timeout. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+ ip_ct_refresh_acct(conntrack, ctinfo, skb,
+ ip_ct_udp_timeout_stream);
+ /* Also, more likely to be important, and not a probe */
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ } else
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
+{
+ return 1;
+}
+
+static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct iphdr *iph = skb->nh.iph;
+ unsigned int udplen = skb->len - iph->ihl * 4;
+ struct udphdr _hdr, *hdr;
+
+ /* Header is too small? */
+ hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
+ if (hdr == NULL) {
+ if (LOG_INVALID(IPPROTO_UDP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_udp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Truncated/malformed packets */
+ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+ if (LOG_INVALID(IPPROTO_UDP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_udp: truncated/malformed packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Packet with no checksum */
+ if (!hdr->check)
+ return NF_ACCEPT;
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the semantic of CHECKSUM_HW is different there
+ * and moreover root might send raw packets.
+ * FIXME: Source route IP option packets --RR */
+ if (hooknum == NF_IP_PRE_ROUTING
+ && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
+ skb->ip_summed == CHECKSUM_HW ? skb->csum
+ : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
+ if (LOG_INVALID(IPPROTO_UDP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_udp: bad UDP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_udp =
+{
+ .proto = IPPROTO_UDP,
+ .name = "udp",
+ .pkt_to_tuple = udp_pkt_to_tuple,
+ .invert_tuple = udp_invert_tuple,
+ .print_tuple = udp_print_tuple,
+ .print_conntrack = udp_print_conntrack,
+ .packet = udp_packet,
+ .new = udp_new,
+ .error = udp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
new file mode 100644
index 00000000000..80a7bde2a57
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -0,0 +1,961 @@
+/* This file contains all the functions required for the standalone
+ ip_conntrack module.
+
+ These are not required by the compatibility layer.
+*/
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <net/checksum.h>
+#include <net/ip.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_LICENSE("GPL");
+
+extern atomic_t ip_conntrack_count;
+DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
+static int kill_proto(struct ip_conntrack *i, void *data)
+{
+ return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
+ *((u_int8_t *) data));
+}
+
+#ifdef CONFIG_PROC_FS
+static int
+print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
+ struct ip_conntrack_protocol *proto)
+{
+ seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+ NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
+ return proto->print_tuple(s, tuple);
+}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+static unsigned int
+seq_print_counters(struct seq_file *s,
+ const struct ip_conntrack_counter *counter)
+{
+ return seq_printf(s, "packets=%llu bytes=%llu ",
+ (unsigned long long)counter->packets,
+ (unsigned long long)counter->bytes);
+}
+#else
+#define seq_print_counters(x, y) 0
+#endif
+
+struct ct_iter_state {
+ unsigned int bucket;
+};
+
+static struct list_head *ct_get_first(struct seq_file *seq)
+{
+ struct ct_iter_state *st = seq->private;
+
+ for (st->bucket = 0;
+ st->bucket < ip_conntrack_htable_size;
+ st->bucket++) {
+ if (!list_empty(&ip_conntrack_hash[st->bucket]))
+ return ip_conntrack_hash[st->bucket].next;
+ }
+ return NULL;
+}
+
+static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+{
+ struct ct_iter_state *st = seq->private;
+
+ head = head->next;
+ while (head == &ip_conntrack_hash[st->bucket]) {
+ if (++st->bucket >= ip_conntrack_htable_size)
+ return NULL;
+ head = ip_conntrack_hash[st->bucket].next;
+ }
+ return head;
+}
+
+static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct list_head *head = ct_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ READ_LOCK(&ip_conntrack_lock);
+ return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
+ READ_UNLOCK(&ip_conntrack_lock);
+}
+
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+ const struct ip_conntrack_tuple_hash *hash = v;
+ const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
+ struct ip_conntrack_protocol *proto;
+
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+ IP_NF_ASSERT(conntrack);
+
+ /* we only want to print DIR_ORIGINAL */
+ if (DIRECTION(hash))
+ return 0;
+
+ proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.protonum);
+ IP_NF_ASSERT(proto);
+
+ if (seq_printf(s, "%-8s %u %ld ",
+ proto->name,
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
+ timer_pending(&conntrack->timeout)
+ ? (long)(conntrack->timeout.expires - jiffies)/HZ
+ : 0) != 0)
+ return -ENOSPC;
+
+ if (proto->print_conntrack(s, conntrack))
+ return -ENOSPC;
+
+ if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ proto))
+ return -ENOSPC;
+
+ if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
+ return -ENOSPC;
+
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
+ if (seq_printf(s, "[UNREPLIED] "))
+ return -ENOSPC;
+
+ if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
+ proto))
+ return -ENOSPC;
+
+ if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
+ return -ENOSPC;
+
+ if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
+ if (seq_printf(s, "[ASSURED] "))
+ return -ENOSPC;
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+ if (seq_printf(s, "mark=%lu ", conntrack->mark))
+ return -ENOSPC;
+#endif
+
+ if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
+ return -ENOSPC;
+
+ return 0;
+}
+
+static struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ struct ct_iter_state *st;
+ int ret;
+
+ st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
+ if (st == NULL)
+ return -ENOMEM;
+ ret = seq_open(file, &ct_seq_ops);
+ if (ret)
+ goto out_free;
+ seq = file->private_data;
+ seq->private = st;
+ memset(st, 0, sizeof(struct ct_iter_state));
+ return ret;
+out_free:
+ kfree(st);
+ return ret;
+}
+
+static struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/* expects */
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct list_head *e = &ip_conntrack_expect_list;
+ loff_t i;
+
+ /* strange seq_file api calls stop even if we fail,
+ * thus we need to grab lock since stop unlocks */
+ READ_LOCK(&ip_conntrack_lock);
+
+ if (list_empty(e))
+ return NULL;
+
+ for (i = 0; i <= *pos; i++) {
+ e = e->next;
+ if (e == &ip_conntrack_expect_list)
+ return NULL;
+ }
+ return e;
+}
+
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct list_head *e = v;
+
+ e = e->next;
+
+ if (e == &ip_conntrack_expect_list)
+ return NULL;
+
+ return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
+ READ_UNLOCK(&ip_conntrack_lock);
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct ip_conntrack_expect *expect = v;
+
+ if (expect->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+ ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+
+ seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
+
+ print_tuple(s, &expect->tuple,
+ ip_ct_find_proto(expect->tuple.dst.protonum));
+ return seq_putc(s, '\n');
+}
+
+static struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &exp_seq_ops);
+}
+
+static struct file_operations exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return &per_cpu(ip_conntrack_stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ int cpu;
+
+ for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return &per_cpu(ip_conntrack_stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+ struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x \n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete
+ );
+ return 0;
+}
+
+static struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ct_cpu_seq_ops);
+}
+
+static struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+#endif
+
+static unsigned int ip_confirm(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ if (ct && ct->helper) {
+ unsigned int ret;
+ ret = ct->helper->help(pskb, ct, ctinfo);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+
+ /* We've seen it coming out the other side: confirm it */
+ return ip_conntrack_confirm(pskb);
+}
+
+static unsigned int ip_conntrack_defrag(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
+ /* Previously seen (loopback)? Ignore. Do this before
+ fragment check. */
+ if ((*pskb)->nfct)
+ return NF_ACCEPT;
+#endif
+
+ /* Gather fragments. */
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ *pskb = ip_ct_gather_frags(*pskb,
+ hooknum == NF_IP_PRE_ROUTING ?
+ IP_DEFRAG_CONNTRACK_IN :
+ IP_DEFRAG_CONNTRACK_OUT);
+ if (!*pskb)
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static unsigned int ip_refrag(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
+
+ /* We've seen it coming out the other side: confirm */
+ if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
+ return NF_DROP;
+
+ /* Local packets are never produced too large for their
+ interface. We degfragment them at LOCAL_OUT, however,
+ so we have to refragment them here. */
+ if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
+ !skb_shinfo(*pskb)->tso_size) {
+ /* No hook can be after us, so this should be OK. */
+ ip_fragment(*pskb, okfn);
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static unsigned int ip_conntrack_local(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk("ipt_hook: happy cracking.\n");
+ return NF_ACCEPT;
+ }
+ return ip_conntrack_in(hooknum, pskb, in, out, okfn);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ make it the first hook. */
+static struct nf_hook_ops ip_conntrack_defrag_ops = {
+ .hook = ip_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ip_conntrack_in_ops = {
+ .hook = ip_conntrack_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = {
+ .hook = ip_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ip_conntrack_local_out_ops = {
+ .hook = ip_conntrack_local,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK,
+};
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ip_conntrack_out_ops = {
+ .hook = ip_refrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_LAST,
+};
+
+static struct nf_hook_ops ip_conntrack_local_in_ops = {
+ .hook = ip_confirm,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_LAST-1,
+};
+
+/* Sysctl support */
+
+#ifdef CONFIG_SYSCTL
+
+/* From ip_conntrack_core.c */
+extern int ip_conntrack_max;
+extern unsigned int ip_conntrack_htable_size;
+
+/* From ip_conntrack_proto_tcp.c */
+extern unsigned long ip_ct_tcp_timeout_syn_sent;
+extern unsigned long ip_ct_tcp_timeout_syn_recv;
+extern unsigned long ip_ct_tcp_timeout_established;
+extern unsigned long ip_ct_tcp_timeout_fin_wait;
+extern unsigned long ip_ct_tcp_timeout_close_wait;
+extern unsigned long ip_ct_tcp_timeout_last_ack;
+extern unsigned long ip_ct_tcp_timeout_time_wait;
+extern unsigned long ip_ct_tcp_timeout_close;
+extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern int ip_ct_tcp_loose;
+extern int ip_ct_tcp_be_liberal;
+extern int ip_ct_tcp_max_retrans;
+
+/* From ip_conntrack_proto_udp.c */
+extern unsigned long ip_ct_udp_timeout;
+extern unsigned long ip_ct_udp_timeout_stream;
+
+/* From ip_conntrack_proto_icmp.c */
+extern unsigned long ip_ct_icmp_timeout;
+
+/* From ip_conntrack_proto_icmp.c */
+extern unsigned long ip_ct_generic_timeout;
+
+/* Log invalid packets of a given protocol */
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
+static struct ctl_table_header *ip_ct_sysctl_header;
+
+static ctl_table ip_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
+ .procname = "ip_conntrack_max",
+ .data = &ip_conntrack_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
+ .procname = "ip_conntrack_count",
+ .data = &ip_conntrack_count,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
+ .procname = "ip_conntrack_buckets",
+ .data = &ip_conntrack_htable_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
+ .procname = "ip_conntrack_tcp_timeout_syn_sent",
+ .data = &ip_ct_tcp_timeout_syn_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
+ .procname = "ip_conntrack_tcp_timeout_syn_recv",
+ .data = &ip_ct_tcp_timeout_syn_recv,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
+ .procname = "ip_conntrack_tcp_timeout_established",
+ .data = &ip_ct_tcp_timeout_established,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
+ .procname = "ip_conntrack_tcp_timeout_fin_wait",
+ .data = &ip_ct_tcp_timeout_fin_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
+ .procname = "ip_conntrack_tcp_timeout_close_wait",
+ .data = &ip_ct_tcp_timeout_close_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
+ .procname = "ip_conntrack_tcp_timeout_last_ack",
+ .data = &ip_ct_tcp_timeout_last_ack,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
+ .procname = "ip_conntrack_tcp_timeout_time_wait",
+ .data = &ip_ct_tcp_timeout_time_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
+ .procname = "ip_conntrack_tcp_timeout_close",
+ .data = &ip_ct_tcp_timeout_close,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
+ .procname = "ip_conntrack_udp_timeout",
+ .data = &ip_ct_udp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
+ .procname = "ip_conntrack_udp_timeout_stream",
+ .data = &ip_ct_udp_timeout_stream,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
+ .procname = "ip_conntrack_icmp_timeout",
+ .data = &ip_ct_icmp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
+ .procname = "ip_conntrack_generic_timeout",
+ .data = &ip_ct_generic_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
+ .procname = "ip_conntrack_log_invalid",
+ .data = &ip_ct_log_invalid,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_timeout_max_retrans",
+ .data = &ip_ct_tcp_timeout_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
+ .procname = "ip_conntrack_tcp_loose",
+ .data = &ip_ct_tcp_loose,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
+ .procname = "ip_conntrack_tcp_be_liberal",
+ .data = &ip_ct_tcp_be_liberal,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_max_retrans",
+ .data = &ip_ct_tcp_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+#define NET_IP_CONNTRACK_MAX 2089
+
+static ctl_table ip_ct_netfilter_table[] = {
+ {
+ .ctl_name = NET_IPV4_NETFILTER,
+ .procname = "netfilter",
+ .mode = 0555,
+ .child = ip_ct_sysctl_table,
+ },
+ {
+ .ctl_name = NET_IP_CONNTRACK_MAX,
+ .procname = "ip_conntrack_max",
+ .data = &ip_conntrack_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_ipv4_table[] = {
+ {
+ .ctl_name = NET_IPV4,
+ .procname = "ipv4",
+ .mode = 0555,
+ .child = ip_ct_netfilter_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_net_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = ip_ct_ipv4_table,
+ },
+ { .ctl_name = 0 }
+};
+
+EXPORT_SYMBOL(ip_ct_log_invalid);
+#endif /* CONFIG_SYSCTL */
+
+static int init_or_cleanup(int init)
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+#endif
+ int ret = 0;
+
+ if (!init) goto cleanup;
+
+ ret = ip_conntrack_init();
+ if (ret < 0)
+ goto cleanup_nothing;
+
+#ifdef CONFIG_PROC_FS
+ ret = -ENOMEM;
+ proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
+ if (!proc) goto cleanup_init;
+
+ proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+ &exp_file_ops);
+ if (!proc_exp) goto cleanup_proc;
+
+ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+ if (!proc_stat)
+ goto cleanup_proc_exp;
+
+ proc_stat->proc_fops = &ct_cpu_seq_fops;
+ proc_stat->owner = THIS_MODULE;
+#endif
+
+ ret = nf_register_hook(&ip_conntrack_defrag_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register pre-routing defrag hook.\n");
+ goto cleanup_proc_stat;
+ }
+ ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local_out defrag hook.\n");
+ goto cleanup_defragops;
+ }
+ ret = nf_register_hook(&ip_conntrack_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register pre-routing hook.\n");
+ goto cleanup_defraglocalops;
+ }
+ ret = nf_register_hook(&ip_conntrack_local_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local out hook.\n");
+ goto cleanup_inops;
+ }
+ ret = nf_register_hook(&ip_conntrack_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register post-routing hook.\n");
+ goto cleanup_inandlocalops;
+ }
+ ret = nf_register_hook(&ip_conntrack_local_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local in hook.\n");
+ goto cleanup_inoutandlocalops;
+ }
+#ifdef CONFIG_SYSCTL
+ ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+ if (ip_ct_sysctl_header == NULL) {
+ printk("ip_conntrack: can't register to sysctl.\n");
+ ret = -ENOMEM;
+ goto cleanup_localinops;
+ }
+#endif
+
+ return ret;
+
+ cleanup:
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ip_ct_sysctl_header);
+ cleanup_localinops:
+#endif
+ nf_unregister_hook(&ip_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+ nf_unregister_hook(&ip_conntrack_out_ops);
+ cleanup_inandlocalops:
+ nf_unregister_hook(&ip_conntrack_local_out_ops);
+ cleanup_inops:
+ nf_unregister_hook(&ip_conntrack_in_ops);
+ cleanup_defraglocalops:
+ nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+ nf_unregister_hook(&ip_conntrack_defrag_ops);
+ cleanup_proc_stat:
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("ip_conntrack", proc_net_stat);
+ cleanup_proc_exp:
+ proc_net_remove("ip_conntrack_expect");
+ cleanup_proc:
+ proc_net_remove("ip_conntrack");
+ cleanup_init:
+#endif /* CONFIG_PROC_FS */
+ ip_conntrack_cleanup();
+ cleanup_nothing:
+ return ret;
+}
+
+/* FIXME: Allow NULL functions and sub in pointers to generic for
+ them. --RR */
+int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
+{
+ int ret = 0;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+ ip_ct_protos[proto->proto] = proto;
+ out:
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return ret;
+}
+
+void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
+{
+ WRITE_LOCK(&ip_conntrack_lock);
+ ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ /* Somebody could be still looking at the proto in bh. */
+ synchronize_net();
+
+ /* Remove all contrack entries for this protocol */
+ ip_ct_iterate_cleanup(kill_proto, &proto->proto);
+}
+
+static int __init init(void)
+{
+ return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+ init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+/* Some modules need us, but don't depend directly on any symbol.
+ They should call this. */
+void need_ip_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(ip_conntrack_protocol_register);
+EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
+EXPORT_SYMBOL(ip_ct_get_tuple);
+EXPORT_SYMBOL(invert_tuplepr);
+EXPORT_SYMBOL(ip_conntrack_alter_reply);
+EXPORT_SYMBOL(ip_conntrack_destroyed);
+EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(ip_conntrack_helper_register);
+EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(ip_ct_iterate_cleanup);
+EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(ip_ct_protos);
+EXPORT_SYMBOL(ip_ct_find_proto);
+EXPORT_SYMBOL(ip_conntrack_expect_alloc);
+EXPORT_SYMBOL(ip_conntrack_expect_free);
+EXPORT_SYMBOL(ip_conntrack_expect_related);
+EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+EXPORT_SYMBOL(ip_ct_gather_frags);
+EXPORT_SYMBOL(ip_conntrack_htable_size);
+EXPORT_SYMBOL(ip_conntrack_lock);
+EXPORT_SYMBOL(ip_conntrack_hash);
+EXPORT_SYMBOL(ip_conntrack_untracked);
+EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
+EXPORT_SYMBOL_GPL(ip_conntrack_put);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+EXPORT_SYMBOL(ip_conntrack_tcp_update);
+#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
new file mode 100644
index 00000000000..992fac3e36e
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -0,0 +1,159 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Version: 0.0.7
+ *
+ * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
+ * - port to newnat API
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
+#include <linux/moduleparam.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("tftp connection tracking helper");
+MODULE_LICENSE("GPL");
+
+#define MAX_PORTS 8
+static int ports[MAX_PORTS];
+static int ports_c;
+module_param_array(ports, int, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of tftp servers");
+
+#if 0
+#define DEBUGP(format, args...) printk("%s:%s:" format, \
+ __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
+
+static int tftp_help(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct tftphdr _tftph, *tfh;
+ struct ip_conntrack_expect *exp;
+ unsigned int ret = NF_ACCEPT;
+
+ tfh = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
+ sizeof(_tftph), &_tftph);
+ if (tfh == NULL)
+ return NF_ACCEPT;
+
+ switch (ntohs(tfh->opcode)) {
+ /* RRQ and WRQ works the same way */
+ case TFTP_OPCODE_READ:
+ case TFTP_OPCODE_WRITE:
+ DEBUGP("");
+ DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ exp = ip_conntrack_expect_alloc();
+ if (exp == NULL)
+ return NF_DROP;
+
+ exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ exp->mask.src.ip = 0xffffffff;
+ exp->mask.dst.ip = 0xffffffff;
+ exp->mask.dst.u.udp.port = 0xffff;
+ exp->mask.dst.protonum = 0xff;
+ exp->expectfn = NULL;
+ exp->master = ct;
+
+ DEBUGP("expect: ");
+ DUMP_TUPLE(&exp->tuple);
+ DUMP_TUPLE(&exp->mask);
+ if (ip_nat_tftp_hook)
+ ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
+ else if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ }
+ break;
+ case TFTP_OPCODE_DATA:
+ case TFTP_OPCODE_ACK:
+ DEBUGP("Data/ACK opcode\n");
+ break;
+ case TFTP_OPCODE_ERROR:
+ DEBUGP("Error opcode\n");
+ break;
+ default:
+ DEBUGP("Unknown opcode\n");
+ }
+ return NF_ACCEPT;
+}
+
+static struct ip_conntrack_helper tftp[MAX_PORTS];
+static char tftp_names[MAX_PORTS][10];
+
+static void fini(void)
+{
+ int i;
+
+ for (i = 0 ; i < ports_c; i++) {
+ DEBUGP("unregistering helper for port %d\n",
+ ports[i]);
+ ip_conntrack_helper_unregister(&tftp[i]);
+ }
+}
+
+static int __init init(void)
+{
+ int i, ret;
+ char *tmpname;
+
+ if (ports_c == 0)
+ ports[ports_c++] = TFTP_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ /* Create helper structure */
+ memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
+
+ tftp[i].tuple.dst.protonum = IPPROTO_UDP;
+ tftp[i].tuple.src.u.udp.port = htons(ports[i]);
+ tftp[i].mask.dst.protonum = 0xFF;
+ tftp[i].mask.src.u.udp.port = 0xFFFF;
+ tftp[i].max_expected = 1;
+ tftp[i].timeout = 5 * 60; /* 5 minutes */
+ tftp[i].me = THIS_MODULE;
+ tftp[i].help = tftp_help;
+
+ tmpname = &tftp_names[i][0];
+ if (ports[i] == TFTP_PORT)
+ sprintf(tmpname, "tftp");
+ else
+ sprintf(tmpname, "tftp-%d", i);
+ tftp[i].name = tmpname;
+
+ DEBUGP("port #%d: %d\n", i, ports[i]);
+
+ ret=ip_conntrack_helper_register(&tftp[i]);
+ if (ret) {
+ printk("ERROR registering helper for port %d\n",
+ ports[i]);
+ fini();
+ return(ret);
+ }
+ }
+ return(0);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
new file mode 100644
index 00000000000..da1f412583e
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_amanda.c
@@ -0,0 +1,88 @@
+/* Amanda extension for TCP NAT alteration.
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on a copy of HW's ip_nat_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Module load syntax:
+ * insmod ip_nat_amanda.o
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
+
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda NAT helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp)
+{
+ char buffer[sizeof("65535")];
+ u_int16_t port;
+ unsigned int ret;
+
+ /* Connection comes from client. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_ORIGINAL;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one (ie. same IP: it will be TCP and master is UDP). */
+ exp->expectfn = ip_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (ip_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
+ }
+
+ sprintf(buffer, "%u", port);
+ ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+ matchoff, matchlen,
+ buffer, strlen(buffer));
+ if (ret != NF_ACCEPT)
+ ip_conntrack_unexpect_related(exp);
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ ip_nat_amanda_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
+}
+
+static int __init init(void)
+{
+ BUG_ON(ip_nat_amanda_hook);
+ ip_nat_amanda_hook = help;
+ return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
new file mode 100644
index 00000000000..162ceacfc29
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -0,0 +1,556 @@
+/* NAT for netfilter; shared with compatibility layer. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h> /* For tcp_prot in getorigdst */
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <linux/jhash.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_RWLOCK(ip_nat_lock);
+
+/* Calculated at init based on memory size */
+static unsigned int ip_nat_htable_size;
+
+static struct list_head *bysource;
+struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+
+
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct ip_conntrack_tuple *tuple)
+{
+ /* Original src, to ensure we map it consistently if poss. */
+ return jhash_3words(tuple->src.ip, tuple->src.u.all,
+ tuple->dst.protonum, 0) % ip_nat_htable_size;
+}
+
+/* Noone using conntrack by the time this called. */
+static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
+{
+ if (!(conn->status & IPS_NAT_DONE_MASK))
+ return;
+
+ WRITE_LOCK(&ip_nat_lock);
+ list_del(&conn->nat.info.bysource);
+ WRITE_UNLOCK(&ip_nat_lock);
+}
+
+/* We do checksum mangling, so if they were wrong before they're still
+ * wrong. Also works for incomplete packets (eg. ICMP dest
+ * unreachables.) */
+u_int16_t
+ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
+{
+ u_int32_t diffs[] = { oldvalinv, newval };
+ return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
+ oldcheck^0xFFFF));
+}
+
+/* Is this tuple already taken? (not by us) */
+int
+ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack *ignored_conntrack)
+{
+ /* Conntrack tracking doesn't keep track of outgoing tuples; only
+ incoming ones. NAT means they don't have a fixed mapping,
+ so we invert the tuple and look for the incoming reply.
+
+ We could keep a separate hash if this proves too slow. */
+ struct ip_conntrack_tuple reply;
+
+ invert_tuplepr(&reply, tuple);
+ return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range. */
+static int
+in_range(const struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range)
+{
+ struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum);
+
+ /* If we are supposed to map IPs, then we must be in the
+ range specified, otherwise let this drag us onto a new src IP. */
+ if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+ if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
+ || ntohl(tuple->src.ip) > ntohl(range->max_ip))
+ return 0;
+ }
+
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
+ || proto->in_range(tuple, IP_NAT_MANIP_SRC,
+ &range->min, &range->max))
+ return 1;
+
+ return 0;
+}
+
+static inline int
+same_src(const struct ip_conntrack *ct,
+ const struct ip_conntrack_tuple *tuple)
+{
+ return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
+ == tuple->dst.protonum
+ && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
+ == tuple->src.ip
+ && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
+ == tuple->src.u.all);
+}
+
+/* Only called for SRC manip */
+static int
+find_appropriate_src(const struct ip_conntrack_tuple *tuple,
+ struct ip_conntrack_tuple *result,
+ const struct ip_nat_range *range)
+{
+ unsigned int h = hash_by_src(tuple);
+ struct ip_conntrack *ct;
+
+ READ_LOCK(&ip_nat_lock);
+ list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
+ if (same_src(ct, tuple)) {
+ /* Copy source part from reply tuple. */
+ invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(result, range)) {
+ READ_UNLOCK(&ip_nat_lock);
+ return 1;
+ }
+ }
+ }
+ READ_UNLOCK(&ip_nat_lock);
+ return 0;
+}
+
+/* For [FUTURE] fragmentation handling, we want the least-used
+ src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
+ if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+ 1-65535, we don't do pro-rata allocation based on ports; we choose
+ the ip with the lowest src-ip/dst-ip/proto usage.
+*/
+static void
+find_best_ips_proto(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range,
+ const struct ip_conntrack *conntrack,
+ enum ip_nat_manip_type maniptype)
+{
+ u_int32_t *var_ipp;
+ /* Host order */
+ u_int32_t minip, maxip, j;
+
+ /* No IP mapping? Do nothing. */
+ if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+ return;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ var_ipp = &tuple->src.ip;
+ else
+ var_ipp = &tuple->dst.ip;
+
+ /* Fast path: only one choice. */
+ if (range->min_ip == range->max_ip) {
+ *var_ipp = range->min_ip;
+ return;
+ }
+
+ /* Hashing source and destination IPs gives a fairly even
+ * spread in practice (if there are a small number of IPs
+ * involved, there usually aren't that many connections
+ * anyway). The consistency means that servers see the same
+ * client coming from the same IP (some Internet Banking sites
+ * like this), even across reboots. */
+ minip = ntohl(range->min_ip);
+ maxip = ntohl(range->max_ip);
+ j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
+ *var_ipp = htonl(minip + j % (maxip - minip + 1));
+}
+
+/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
+ * we change the source to map into the range. For NF_IP_PRE_ROUTING
+ * and NF_IP_LOCAL_OUT, we change the destination to map into the
+ * range. It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __ip_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig_tuple,
+ const struct ip_nat_range *range,
+ struct ip_conntrack *conntrack,
+ enum ip_nat_manip_type maniptype)
+{
+ struct ip_nat_protocol *proto
+ = ip_nat_find_proto(orig_tuple->dst.protonum);
+
+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
+ and that same mapping gives a unique tuple within the given
+ range, use that.
+
+ This is only required for source (ie. NAT/masq) mappings.
+ So far, we don't do local source mappings, so multiple
+ manips not an issue. */
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ if (find_appropriate_src(orig_tuple, tuple, range)) {
+ DEBUGP("get_unique_tuple: Found current src map\n");
+ if (!ip_nat_used_tuple(tuple, conntrack))
+ return;
+ }
+ }
+
+ /* 2) Select the least-used IP/proto combination in the given
+ range. */
+ *tuple = *orig_tuple;
+ find_best_ips_proto(tuple, range, conntrack, maniptype);
+
+ /* 3) The per-protocol part of the manip is made to map into
+ the range to make a unique tuple. */
+
+ /* Only bother mapping if it's not already in range and unique */
+ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
+ || proto->in_range(tuple, maniptype, &range->min, &range->max))
+ && !ip_nat_used_tuple(tuple, conntrack))
+ return;
+
+ /* Last change: get protocol to try to obtain unique tuple. */
+ proto->unique_tuple(tuple, range, maniptype, conntrack);
+}
+
+unsigned int
+ip_nat_setup_info(struct ip_conntrack *conntrack,
+ const struct ip_nat_range *range,
+ unsigned int hooknum)
+{
+ struct ip_conntrack_tuple curr_tuple, new_tuple;
+ struct ip_nat_info *info = &conntrack->nat.info;
+ int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
+ enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ || hooknum == NF_IP_POST_ROUTING
+ || hooknum == NF_IP_LOCAL_IN
+ || hooknum == NF_IP_LOCAL_OUT);
+ BUG_ON(ip_nat_initialized(conntrack, maniptype));
+
+ /* What we've got will look like inverse of reply. Normally
+ this is what is in the conntrack, except for prior
+ manipulations (future optimization: if num_manips == 0,
+ orig_tp =
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
+ invert_tuplepr(&curr_tuple,
+ &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
+
+ if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+ struct ip_conntrack_tuple reply;
+
+ /* Alter conntrack table so will recognize replies. */
+ invert_tuplepr(&reply, &new_tuple);
+ ip_conntrack_alter_reply(conntrack, &reply);
+
+ /* Non-atomic: we own this at the moment. */
+ if (maniptype == IP_NAT_MANIP_SRC)
+ conntrack->status |= IPS_SRC_NAT;
+ else
+ conntrack->status |= IPS_DST_NAT;
+ }
+
+ /* Place in source hash if this is the first time. */
+ if (have_to_hash) {
+ unsigned int srchash
+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple);
+ WRITE_LOCK(&ip_nat_lock);
+ list_add(&info->bysource, &bysource[srchash]);
+ WRITE_UNLOCK(&ip_nat_lock);
+ }
+
+ /* It's done. */
+ if (maniptype == IP_NAT_MANIP_DST)
+ set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
+ else
+ set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
+
+ return NF_ACCEPT;
+}
+
+/* Returns true if succeeded. */
+static int
+manip_pkt(u_int16_t proto,
+ struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct ip_conntrack_tuple *target,
+ enum ip_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+
+ (*pskb)->nfcache |= NFC_ALTERED;
+ if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
+ return 0;
+
+ iph = (void *)(*pskb)->data + iphdroff;
+
+ /* Manipulate protcol part. */
+ if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
+ target, maniptype))
+ return 0;
+
+ iph = (void *)(*pskb)->data + iphdroff;
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
+ iph->check);
+ iph->saddr = target->src.ip;
+ } else {
+ iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
+ iph->check);
+ iph->daddr = target->dst.ip;
+ }
+ return 1;
+}
+
+/* Do packet manipulations according to ip_nat_setup_info. */
+unsigned int nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned long statusbit;
+ enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
+
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)
+ && (hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN)) {
+ DEBUGP("ip_nat_core: adjusting sequence number\n");
+ /* future: put this in a l4-proto specific function,
+ * and call this function here. */
+ if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
+ return NF_DROP;
+ }
+
+ if (mtype == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ /* Non-atomic: these bits don't change. */
+ if (ct->status & statusbit) {
+ struct ip_conntrack_tuple target;
+
+ /* We are aiming to look like inverse of other direction. */
+ invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+ if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int icmp_reply_translation(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ struct ip_conntrack_tuple inner, target;
+ int hdrlen = (*pskb)->nh.iph->ihl * 4;
+
+ if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
+ return 0;
+
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+
+ /* We're actually going to mangle it beyond trivial checksum
+ adjustment, so make sure the current checksum is correct. */
+ if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
+ hdrlen = (*pskb)->nh.iph->ihl * 4;
+ if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
+ (*pskb)->len - hdrlen, 0)))
+ return 0;
+ }
+
+ /* Must be RELATED */
+ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
+ (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+
+ /* Redirects on non-null nats must be dropped, else they'll
+ start talking to each other without our translation, and be
+ confused... --RR */
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ /* If NAT isn't finished, assume it and drop. */
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
+ *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
+
+ if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
+ sizeof(struct icmphdr) + inside->ip.ihl*4,
+ &inner, ip_ct_find_proto(inside->ip.protocol)))
+ return 0;
+
+ /* Change inner back to look like incoming packet. We do the
+ opposite manip on this hook to normal, because it might not
+ pass all hooks (locally-generated ICMP). Consider incoming
+ packet: PREROUTING (DST manip), routing produces ICMP, goes
+ through POSTROUTING (which must correct the DST manip). */
+ if (!manip_pkt(inside->ip.protocol, pskb,
+ (*pskb)->nh.iph->ihl*4
+ + sizeof(inside->icmp),
+ &ct->tuplehash[!dir].tuple,
+ !manip))
+ return 0;
+
+ /* Reloading "inside" here since manip_pkt inner. */
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+ (*pskb)->len - hdrlen,
+ 0));
+
+ /* Change outer to look the reply to an incoming packet
+ * (proto 0 means don't invert per-proto part). */
+
+ /* Obviously, we need to NAT destination IP, but source IP
+ should be NAT'ed only if it is from a NAT'd host.
+
+ Explanation: some people use NAT for anonymizing. Also,
+ CERT recommends dropping all packets from private IP
+ addresses (although ICMP errors from internal links with
+ such addresses are not too uncommon, as Alan Cox points
+ out) */
+ if (manip != IP_NAT_MANIP_SRC
+ || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
+ invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ if (!manip_pkt(0, pskb, 0, &target, manip))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Protocol registration. */
+int ip_nat_protocol_register(struct ip_nat_protocol *proto)
+{
+ int ret = 0;
+
+ WRITE_LOCK(&ip_nat_lock);
+ if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+ ip_nat_protos[proto->protonum] = proto;
+ out:
+ WRITE_UNLOCK(&ip_nat_lock);
+ return ret;
+}
+
+/* Noone stores the protocol anywhere; simply delete it. */
+void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+{
+ WRITE_LOCK(&ip_nat_lock);
+ ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+ WRITE_UNLOCK(&ip_nat_lock);
+
+ /* Someone could be still looking at the proto in a bh. */
+ synchronize_net();
+}
+
+int __init ip_nat_init(void)
+{
+ size_t i;
+
+ /* Leave them the same for the moment. */
+ ip_nat_htable_size = ip_conntrack_htable_size;
+
+ /* One vmalloc for both hash tables */
+ bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
+ if (!bysource)
+ return -ENOMEM;
+
+ /* Sew in builtin protocols. */
+ WRITE_LOCK(&ip_nat_lock);
+ for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+ ip_nat_protos[i] = &ip_nat_unknown_protocol;
+ ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+ ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+ ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+ WRITE_UNLOCK(&ip_nat_lock);
+
+ for (i = 0; i < ip_nat_htable_size; i++) {
+ INIT_LIST_HEAD(&bysource[i]);
+ }
+
+ /* FIXME: Man, this is a hack. <SIGH> */
+ IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
+ ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+
+ /* Initialize fake conntrack so that NAT will skip it */
+ ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+ return 0;
+}
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct ip_conntrack *i, void *data)
+{
+ memset(&i->nat, 0, sizeof(i->nat));
+ i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+ return 0;
+}
+
+/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
+void ip_nat_cleanup(void)
+{
+ ip_ct_iterate_cleanup(&clean_nat, NULL);
+ ip_conntrack_destroyed = NULL;
+ vfree(bysource);
+}
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
new file mode 100644
index 00000000000..c6000e794ad
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -0,0 +1,183 @@
+/* FTP extension for TCP NAT alteration. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/moduleparam.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp NAT helper");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* FIXME: Time out? --RR */
+
+static int
+mangle_rfc959_packet(struct sk_buff **pskb,
+ u_int32_t newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
+{
+ char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
+
+ sprintf(buffer, "%u,%u,%u,%u,%u,%u",
+ NIPQUAD(newip), port>>8, port&0xFF);
+
+ DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+
+ *seq += strlen(buffer) - matchlen;
+ return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_eprt_packet(struct sk_buff **pskb,
+ u_int32_t newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
+{
+ char buffer[sizeof("|1|255.255.255.255|65535|")];
+
+ sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
+
+ DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+
+ *seq += strlen(buffer) - matchlen;
+ return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_epsv_packet(struct sk_buff **pskb,
+ u_int32_t newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
+{
+ char buffer[sizeof("|||65535|")];
+
+ sprintf(buffer, "|||%u|", port);
+
+ DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+
+ *seq += strlen(buffer) - matchlen;
+ return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
+ unsigned int,
+ unsigned int,
+ struct ip_conntrack *,
+ enum ip_conntrack_info,
+ u32 *seq)
+= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
+ [IP_CT_FTP_PASV] = mangle_rfc959_packet,
+ [IP_CT_FTP_EPRT] = mangle_eprt_packet,
+ [IP_CT_FTP_EPSV] = mangle_epsv_packet
+};
+
+/* So, this packet has hit the connection tracking matching code.
+ Mangle it, and change the expectation to match the new version. */
+static unsigned int ip_nat_ftp(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp,
+ u32 *seq)
+{
+ u_int32_t newip;
+ u_int16_t port;
+ int dir = CTINFO2DIR(ctinfo);
+ struct ip_conntrack *ct = exp->master;
+
+ DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+
+ /* Connection will come from wherever this packet goes, hence !dir */
+ newip = ct->tuplehash[!dir].tuple.dst.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = !dir;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one. */
+ exp->expectfn = ip_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (ip_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
+ }
+
+ if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
+ seq)) {
+ ip_conntrack_unexpect_related(exp);
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+static void __exit fini(void)
+{
+ ip_nat_ftp_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
+}
+
+static int __init init(void)
+{
+ BUG_ON(ip_nat_ftp_hook);
+ ip_nat_ftp_hook = ip_nat_ftp;
+ return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+ printk(KERN_INFO __stringify(KBUILD_MODNAME)
+ ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
new file mode 100644
index 00000000000..1637b96d8c0
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -0,0 +1,430 @@
+/* ip_nat_helper.c - generic support functions for NAT helpers
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
+ * - add support for SACK adjustment
+ * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
+ * - merge SACK support into newnat API
+ * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
+ * - make ip_nat_resize_packet more generic (TCP and UDP)
+ * - add ip_nat_mangle_udp_packet
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
+#else
+#define DEBUGP(format, args...)
+#define DUMP_OFFSET(x)
+#endif
+
+static DECLARE_LOCK(ip_nat_seqofs_lock);
+
+/* Setup TCP sequence correction given this change at this sequence */
+static inline void
+adjust_tcp_sequence(u32 seq,
+ int sizediff,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir;
+ struct ip_nat_seq *this_way, *other_way;
+
+ DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
+ (*skb)->len, new_size);
+
+ dir = CTINFO2DIR(ctinfo);
+
+ this_way = &ct->nat.info.seq[dir];
+ other_way = &ct->nat.info.seq[!dir];
+
+ DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
+ DUMP_OFFSET(this_way);
+
+ LOCK_BH(&ip_nat_seqofs_lock);
+
+ /* SYN adjust. If it's uninitialized, or this is after last
+ * correction, record it: we don't handle more than one
+ * adjustment in the window, but do deal with common case of a
+ * retransmit */
+ if (this_way->offset_before == this_way->offset_after
+ || before(this_way->correction_pos, seq)) {
+ this_way->correction_pos = seq;
+ this_way->offset_before = this_way->offset_after;
+ this_way->offset_after += sizediff;
+ }
+ UNLOCK_BH(&ip_nat_seqofs_lock);
+
+ DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
+ DUMP_OFFSET(this_way);
+}
+
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+ unsigned int dataoff,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ unsigned char *data;
+
+ BUG_ON(skb_is_nonlinear(skb));
+ data = (unsigned char *)skb->nh.iph + dataoff;
+
+ /* move post-replacement */
+ memmove(data + match_offset + rep_len,
+ data + match_offset + match_len,
+ skb->tail - (data + match_offset + match_len));
+
+ /* insert data from buffer */
+ memcpy(data + match_offset, rep_buffer, rep_len);
+
+ /* update skb info */
+ if (rep_len > match_len) {
+ DEBUGP("ip_nat_mangle_packet: Extending packet by "
+ "%u from %u bytes\n", rep_len - match_len,
+ skb->len);
+ skb_put(skb, rep_len - match_len);
+ } else {
+ DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
+ "%u from %u bytes\n", match_len - rep_len,
+ skb->len);
+ __skb_trim(skb, skb->len + rep_len - match_len);
+ }
+
+ /* fix IP hdr checksum information */
+ skb->nh.iph->tot_len = htons(skb->len);
+ ip_send_check(skb->nh.iph);
+}
+
+/* Unusual, but possible case. */
+static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+{
+ struct sk_buff *nskb;
+
+ if ((*pskb)->len + extra > 65535)
+ return 0;
+
+ nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
+ if (!nskb)
+ return 0;
+
+ /* Transfer socket to new skb. */
+ if ((*pskb)->sk)
+ skb_set_owner_w(nskb, (*pskb)->sk);
+#ifdef CONFIG_NETFILTER_DEBUG
+ nskb->nf_debug = (*pskb)->nf_debug;
+#endif
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ return 1;
+}
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+int
+ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int datalen;
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
+ return 0;
+
+ if (rep_len > match_len
+ && rep_len - match_len > skb_tailroom(*pskb)
+ && !enlarge_skb(pskb, rep_len - match_len))
+ return 0;
+
+ SKB_LINEAR_ASSERT(*pskb);
+
+ iph = (*pskb)->nh.iph;
+ tcph = (void *)iph + iph->ihl*4;
+
+ mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+ match_offset, match_len, rep_buffer, rep_len);
+
+ datalen = (*pskb)->len - iph->ihl*4;
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
+ csum_partial((char *)tcph, datalen, 0));
+
+ if (rep_len != match_len) {
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+ adjust_tcp_sequence(ntohl(tcph->seq),
+ (int)rep_len - (int)match_len,
+ ct, ctinfo);
+ /* Tell TCP window tracking about seq change */
+ ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
+ }
+ return 1;
+}
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
+ * should be fairly easy to do.
+ */
+int
+ip_nat_mangle_udp_packet(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ struct iphdr *iph;
+ struct udphdr *udph;
+
+ /* UDP helpers might accidentally mangle the wrong packet */
+ iph = (*pskb)->nh.iph;
+ if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
+ match_offset + match_len)
+ return 0;
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
+ return 0;
+
+ if (rep_len > match_len
+ && rep_len - match_len > skb_tailroom(*pskb)
+ && !enlarge_skb(pskb, rep_len - match_len))
+ return 0;
+
+ iph = (*pskb)->nh.iph;
+ udph = (void *)iph + iph->ihl*4;
+ mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+ match_offset, match_len, rep_buffer, rep_len);
+
+ /* update the length of the UDP packet */
+ udph->len = htons((*pskb)->len - iph->ihl*4);
+
+ /* fix udp checksum if udp checksum was previously calculated */
+ if (udph->check) {
+ int datalen = (*pskb)->len - iph->ihl * 4;
+ udph->check = 0;
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ csum_partial((char *)udph,
+ datalen, 0));
+ }
+
+ return 1;
+}
+
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct sk_buff *skb,
+ struct tcphdr *tcph,
+ unsigned int sackoff,
+ unsigned int sackend,
+ struct ip_nat_seq *natseq)
+{
+ while (sackoff < sackend) {
+ struct tcp_sack_block *sack;
+ u_int32_t new_start_seq, new_end_seq;
+
+ sack = (void *)skb->data + sackoff;
+ if (after(ntohl(sack->start_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_start_seq = ntohl(sack->start_seq)
+ - natseq->offset_after;
+ else
+ new_start_seq = ntohl(sack->start_seq)
+ - natseq->offset_before;
+ new_start_seq = htonl(new_start_seq);
+
+ if (after(ntohl(sack->end_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_end_seq = ntohl(sack->end_seq)
+ - natseq->offset_after;
+ else
+ new_end_seq = ntohl(sack->end_seq)
+ - natseq->offset_before;
+ new_end_seq = htonl(new_end_seq);
+
+ DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+ ntohl(sack->start_seq), new_start_seq,
+ ntohl(sack->end_seq), new_end_seq);
+
+ tcph->check =
+ ip_nat_cheat_check(~sack->start_seq, new_start_seq,
+ ip_nat_cheat_check(~sack->end_seq,
+ new_end_seq,
+ tcph->check));
+ sack->start_seq = new_start_seq;
+ sack->end_seq = new_end_seq;
+ sackoff += sizeof(*sack);
+ }
+}
+
+/* TCP SACK sequence number adjustment */
+static inline unsigned int
+ip_nat_sack_adjust(struct sk_buff **pskb,
+ struct tcphdr *tcph,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dir, optoff, optend;
+
+ optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
+ optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+
+ if (!skb_ip_make_writable(pskb, optend))
+ return 0;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ while (optoff < optend) {
+ /* Usually: option, length. */
+ unsigned char *op = (*pskb)->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ /* no partial options */
+ if (optoff + 1 == optend
+ || optoff + op[1] > optend
+ || op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_SACK
+ && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
+ && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+ sack_adjust(*pskb, tcph, optoff+2,
+ optoff+op[1],
+ &ct->nat.info.seq[!dir]);
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+
+/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
+int
+ip_nat_seq_adjust(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct tcphdr *tcph;
+ int dir, newseq, newack;
+ struct ip_nat_seq *this_way, *other_way;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ this_way = &ct->nat.info.seq[dir];
+ other_way = &ct->nat.info.seq[!dir];
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ return 0;
+
+ tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ if (after(ntohl(tcph->seq), this_way->correction_pos))
+ newseq = ntohl(tcph->seq) + this_way->offset_after;
+ else
+ newseq = ntohl(tcph->seq) + this_way->offset_before;
+ newseq = htonl(newseq);
+
+ if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+ newack = ntohl(tcph->ack_seq) - other_way->offset_after;
+ else
+ newack = ntohl(tcph->ack_seq) - other_way->offset_before;
+ newack = htonl(newack);
+
+ tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
+ ip_nat_cheat_check(~tcph->ack_seq,
+ newack,
+ tcph->check));
+
+ DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+ ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+ ntohl(newack));
+
+ tcph->seq = newseq;
+ tcph->ack_seq = newack;
+
+ if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+ return 0;
+
+ ip_conntrack_tcp_update(*pskb, ct, dir);
+
+ return 1;
+}
+
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void ip_nat_follow_master(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp)
+{
+ struct ip_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
+ /* hook doesn't matter, but it has to do source manip */
+ ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.ip;
+ /* hook doesn't matter, but it has to do destination manip */
+ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
new file mode 100644
index 00000000000..9c1ca3381d5
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -0,0 +1,125 @@
+/* IRC extension for TCP NAT alteration.
+ * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ * based on a copy of RR's ip_nat_ftp.c
+ *
+ * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/moduleparam.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp)
+{
+ u_int16_t port;
+ unsigned int ret;
+
+ /* "4294967296 65635 " */
+ char buffer[18];
+
+ DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
+ expect->seq, exp_irc_info->len,
+ ntohl(tcph->seq));
+
+ /* Reply comes from server. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one. */
+ exp->expectfn = ip_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (ip_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
+ }
+
+ /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
+ * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
+ * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
+ * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
+ * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
+ * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
+ * 255.255.255.255==4294967296, 10 digits)
+ * P: bound port (min 1 d, max 5d (65635))
+ * F: filename (min 1 d )
+ * S: size (min 1 d )
+ * 0x01, \n: terminators
+ */
+
+ /* AAA = "us", ie. where server normally talks to. */
+ sprintf(buffer, "%u %u",
+ ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
+ port);
+ DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
+ buffer, NIPQUAD(exp->tuple.src.ip), port);
+
+ ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+ matchoff, matchlen, buffer,
+ strlen(buffer));
+ if (ret != NF_ACCEPT)
+ ip_conntrack_unexpect_related(exp);
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ ip_nat_irc_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
+}
+
+static int __init init(void)
+{
+ BUG_ON(ip_nat_irc_hook);
+ ip_nat_irc_hook = help;
+ return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+ printk(KERN_INFO __stringify(KBUILD_MODNAME)
+ ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
new file mode 100644
index 00000000000..a558cf0eee8
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -0,0 +1,115 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+static int
+icmp_in_range(const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype,
+ const union ip_conntrack_manip_proto *min,
+ const union ip_conntrack_manip_proto *max)
+{
+ return (tuple->src.u.icmp.id >= min->icmp.id
+ && tuple->src.u.icmp.id <= max->icmp.id);
+}
+
+static int
+icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range,
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+{
+ static u_int16_t id;
+ unsigned int range_size
+ = (unsigned int)range->max.icmp.id - range->min.icmp.id + 1;
+ unsigned int i;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xFFFF;
+
+ for (i = 0; i < range_size; i++, id++) {
+ tuple->src.u.icmp.id = range->min.icmp.id + (id % range_size);
+ if (!ip_nat_used_tuple(tuple, conntrack))
+ return 1;
+ }
+ return 0;
+}
+
+static int
+icmp_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct icmphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+
+ if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+ return 0;
+
+ hdr = (struct icmphdr *)((*pskb)->data + hdroff);
+
+ hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
+ tuple->src.u.icmp.id,
+ hdr->checksum);
+ hdr->un.echo.id = tuple->src.u.icmp.id;
+ return 1;
+}
+
+static unsigned int
+icmp_print(char *buffer,
+ const struct ip_conntrack_tuple *match,
+ const struct ip_conntrack_tuple *mask)
+{
+ unsigned int len = 0;
+
+ if (mask->src.u.icmp.id)
+ len += sprintf(buffer + len, "id=%u ",
+ ntohs(match->src.u.icmp.id));
+
+ if (mask->dst.u.icmp.type)
+ len += sprintf(buffer + len, "type=%u ",
+ ntohs(match->dst.u.icmp.type));
+
+ if (mask->dst.u.icmp.code)
+ len += sprintf(buffer + len, "code=%u ",
+ ntohs(match->dst.u.icmp.code));
+
+ return len;
+}
+
+static unsigned int
+icmp_print_range(char *buffer, const struct ip_nat_range *range)
+{
+ if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
+ return sprintf(buffer, "id %u-%u ",
+ ntohs(range->min.icmp.id),
+ ntohs(range->max.icmp.id));
+ else return 0;
+}
+
+struct ip_nat_protocol ip_nat_protocol_icmp
+= { "ICMP", IPPROTO_ICMP,
+ icmp_manip_pkt,
+ icmp_in_range,
+ icmp_unique_tuple,
+ icmp_print,
+ icmp_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
new file mode 100644
index 00000000000..a91cfceff27
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -0,0 +1,178 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+
+static int
+tcp_in_range(const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype,
+ const union ip_conntrack_manip_proto *min,
+ const union ip_conntrack_manip_proto *max)
+{
+ u_int16_t port;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ port = tuple->src.u.tcp.port;
+ else
+ port = tuple->dst.u.tcp.port;
+
+ return ntohs(port) >= ntohs(min->tcp.port)
+ && ntohs(port) <= ntohs(max->tcp.port);
+}
+
+static int
+tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range,
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+{
+ static u_int16_t port, *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ portptr = &tuple->src.u.tcp.port;
+ else
+ portptr = &tuple->dst.u.tcp.port;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == IP_NAT_MANIP_DST)
+ return 0;
+
+ /* Map privileged onto privileged. */
+ if (ntohs(*portptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*portptr)<512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min.tcp.port);
+ range_size = ntohs(range->max.tcp.port) - min + 1;
+ }
+
+ for (i = 0; i < range_size; i++, port++) {
+ *portptr = htons(min + port % range_size);
+ if (!ip_nat_used_tuple(tuple, conntrack)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+tcp_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct tcphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ u32 oldip, newip;
+ u16 *portptr, newport, oldport;
+ int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+ /* this could be a inner header returned in icmp packet; in such
+ cases we cannot update the checksum field since it is outside of
+ the 8 bytes of transport layer headers we are guaranteed */
+ if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+ hdrsize = sizeof(struct tcphdr);
+
+ if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
+ return 0;
+
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.ip;
+ newport = tuple->src.u.tcp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.ip;
+ newport = tuple->dst.u.tcp.port;
+ portptr = &hdr->dest;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return 1;
+
+ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(oldport ^ 0xFFFF,
+ newport,
+ hdr->check));
+ return 1;
+}
+
+static unsigned int
+tcp_print(char *buffer,
+ const struct ip_conntrack_tuple *match,
+ const struct ip_conntrack_tuple *mask)
+{
+ unsigned int len = 0;
+
+ if (mask->src.u.tcp.port)
+ len += sprintf(buffer + len, "srcpt=%u ",
+ ntohs(match->src.u.tcp.port));
+
+
+ if (mask->dst.u.tcp.port)
+ len += sprintf(buffer + len, "dstpt=%u ",
+ ntohs(match->dst.u.tcp.port));
+
+ return len;
+}
+
+static unsigned int
+tcp_print_range(char *buffer, const struct ip_nat_range *range)
+{
+ if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
+ if (range->min.tcp.port == range->max.tcp.port)
+ return sprintf(buffer, "port %u ",
+ ntohs(range->min.tcp.port));
+ else
+ return sprintf(buffer, "ports %u-%u ",
+ ntohs(range->min.tcp.port),
+ ntohs(range->max.tcp.port));
+ }
+ else return 0;
+}
+
+struct ip_nat_protocol ip_nat_protocol_tcp
+= { "TCP", IPPROTO_TCP,
+ tcp_manip_pkt,
+ tcp_in_range,
+ tcp_unique_tuple,
+ tcp_print,
+ tcp_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
new file mode 100644
index 00000000000..c669e3b5f5d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -0,0 +1,165 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+static int
+udp_in_range(const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype,
+ const union ip_conntrack_manip_proto *min,
+ const union ip_conntrack_manip_proto *max)
+{
+ u_int16_t port;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ port = tuple->src.u.udp.port;
+ else
+ port = tuple->dst.u.udp.port;
+
+ return ntohs(port) >= ntohs(min->udp.port)
+ && ntohs(port) <= ntohs(max->udp.port);
+}
+
+static int
+udp_unique_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range,
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+{
+ static u_int16_t port, *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ portptr = &tuple->src.u.udp.port;
+ else
+ portptr = &tuple->dst.u.udp.port;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == IP_NAT_MANIP_DST)
+ return 0;
+
+ if (ntohs(*portptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*portptr)<512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min.udp.port);
+ range_size = ntohs(range->max.udp.port) - min + 1;
+ }
+
+ for (i = 0; i < range_size; i++, port++) {
+ *portptr = htons(min + port % range_size);
+ if (!ip_nat_used_tuple(tuple, conntrack))
+ return 1;
+ }
+ return 0;
+}
+
+static int
+udp_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct udphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ u32 oldip, newip;
+ u16 *portptr, newport;
+
+ if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+ return 0;
+
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct udphdr *)((*pskb)->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.ip;
+ newport = tuple->src.u.udp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.ip;
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+ if (hdr->check) /* 0 is a special case meaning no checksum */
+ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(*portptr ^ 0xFFFF,
+ newport,
+ hdr->check));
+ *portptr = newport;
+ return 1;
+}
+
+static unsigned int
+udp_print(char *buffer,
+ const struct ip_conntrack_tuple *match,
+ const struct ip_conntrack_tuple *mask)
+{
+ unsigned int len = 0;
+
+ if (mask->src.u.udp.port)
+ len += sprintf(buffer + len, "srcpt=%u ",
+ ntohs(match->src.u.udp.port));
+
+
+ if (mask->dst.u.udp.port)
+ len += sprintf(buffer + len, "dstpt=%u ",
+ ntohs(match->dst.u.udp.port));
+
+ return len;
+}
+
+static unsigned int
+udp_print_range(char *buffer, const struct ip_nat_range *range)
+{
+ if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
+ if (range->min.udp.port == range->max.udp.port)
+ return sprintf(buffer, "port %u ",
+ ntohs(range->min.udp.port));
+ else
+ return sprintf(buffer, "ports %u-%u ",
+ ntohs(range->min.udp.port),
+ ntohs(range->max.udp.port));
+ }
+ else return 0;
+}
+
+struct ip_nat_protocol ip_nat_protocol_udp
+= { "UDP", IPPROTO_UDP,
+ udp_manip_pkt,
+ udp_in_range,
+ udp_unique_tuple,
+ udp_print,
+ udp_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
new file mode 100644
index 00000000000..f5525bd58d1
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -0,0 +1,70 @@
+/* The "unknown" protocol. This is what is used for protocols we
+ * don't understand. It's returned by ip_ct_find_proto().
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/if.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type manip_type,
+ const union ip_conntrack_manip_proto *min,
+ const union ip_conntrack_manip_proto *max)
+{
+ return 1;
+}
+
+static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range,
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+{
+ /* Sorry: we can't help you; if it's not unique, we can't frob
+ anything. */
+ return 0;
+}
+
+static int
+unknown_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype)
+{
+ return 1;
+}
+
+static unsigned int
+unknown_print(char *buffer,
+ const struct ip_conntrack_tuple *match,
+ const struct ip_conntrack_tuple *mask)
+{
+ return 0;
+}
+
+static unsigned int
+unknown_print_range(char *buffer, const struct ip_nat_range *range)
+{
+ return 0;
+}
+
+struct ip_nat_protocol ip_nat_unknown_protocol = {
+ "unknown", 0,
+ unknown_manip_pkt,
+ unknown_in_range,
+ unknown_unique_tuple,
+ unknown_print,
+ unknown_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
new file mode 100644
index 00000000000..581f097f5a2
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -0,0 +1,319 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Everything about the rules for NAT. */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/bitops.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} nat_initial_table __initdata
+= { { "nat", NAT_VALID_HOOKS, 4,
+ sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ { [NF_IP_PRE_ROUTING] = 0,
+ [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
+ [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+ { [NF_IP_PRE_ROUTING] = 0,
+ [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
+ [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+ 0, NULL, { } },
+ {
+ /* PRE_ROUTING */
+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+ 0,
+ sizeof(struct ipt_entry),
+ sizeof(struct ipt_standard),
+ 0, { 0, 0 }, { } },
+ { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+ -NF_ACCEPT - 1 } },
+ /* POST_ROUTING */
+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+ 0,
+ sizeof(struct ipt_entry),
+ sizeof(struct ipt_standard),
+ 0, { 0, 0 }, { } },
+ { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+ -NF_ACCEPT - 1 } },
+ /* LOCAL_OUT */
+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+ 0,
+ sizeof(struct ipt_entry),
+ sizeof(struct ipt_standard),
+ 0, { 0, 0 }, { } },
+ { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+ -NF_ACCEPT - 1 } }
+ },
+ /* ERROR */
+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+ 0,
+ sizeof(struct ipt_entry),
+ sizeof(struct ipt_error),
+ 0, { 0, 0 }, { } },
+ { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
+ { } },
+ "ERROR"
+ }
+ }
+};
+
+static struct ipt_table nat_table = {
+ .name = "nat",
+ .valid_hooks = NAT_VALID_HOOKS,
+ .lock = RW_LOCK_UNLOCKED,
+ .me = THIS_MODULE,
+};
+
+/* Source NAT */
+static unsigned int ipt_snat_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+
+ IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+
+ /* Connection must be valid and new. */
+ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ IP_NF_ASSERT(out);
+
+ return ip_nat_setup_info(ct, &mr->range[0], hooknum);
+}
+
+/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
+static void warn_if_extra_mangle(u32 dstip, u32 srcip)
+{
+ static int warned = 0;
+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
+ struct rtable *rt;
+
+ if (ip_route_output_key(&rt, &fl) != 0)
+ return;
+
+ if (rt->rt_src != srcip && !warned) {
+ printk("NAT: no longer support implicit source local NAT\n");
+ printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
+ NIPQUAD(srcip), NIPQUAD(dstip));
+ warned = 1;
+ }
+ ip_rt_put(rt);
+}
+
+static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+
+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ || hooknum == NF_IP_LOCAL_OUT);
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+
+ /* Connection must be valid and new. */
+ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ if (hooknum == NF_IP_LOCAL_OUT
+ && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+ warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+ mr->range[0].min_ip);
+
+ return ip_nat_setup_info(ct, &mr->range[0], hooknum);
+}
+
+static int ipt_snat_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ip_nat_multi_range_compat *mr = targinfo;
+
+ /* Must be a valid range */
+ if (mr->rangesize != 1) {
+ printk("SNAT: multiple ranges no longer supported\n");
+ return 0;
+ }
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) {
+ DEBUGP("SNAT: Target size %u wrong for %u ranges\n",
+ targinfosize, mr->rangesize);
+ return 0;
+ }
+
+ /* Only allow these for NAT. */
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("SNAT: wrong table %s\n", tablename);
+ return 0;
+ }
+
+ if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
+ DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask);
+ return 0;
+ }
+ return 1;
+}
+
+static int ipt_dnat_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ip_nat_multi_range_compat *mr = targinfo;
+
+ /* Must be a valid range */
+ if (mr->rangesize != 1) {
+ printk("DNAT: multiple ranges no longer supported\n");
+ return 0;
+ }
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) {
+ DEBUGP("DNAT: Target size %u wrong for %u ranges\n",
+ targinfosize, mr->rangesize);
+ return 0;
+ }
+
+ /* Only allow these for NAT. */
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("DNAT: wrong table %s\n", tablename);
+ return 0;
+ }
+
+ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
+ DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask);
+ return 0;
+ }
+
+ return 1;
+}
+
+inline unsigned int
+alloc_null_binding(struct ip_conntrack *conntrack,
+ struct ip_nat_info *info,
+ unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ Use reply in case it's already been mangled (eg local packet).
+ */
+ u_int32_t ip
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
+ : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
+ struct ip_nat_range range
+ = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
+
+ DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
+ NIPQUAD(ip));
+ return ip_nat_setup_info(conntrack, &range, hooknum);
+}
+
+int ip_nat_rule_find(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct ip_conntrack *ct,
+ struct ip_nat_info *info)
+{
+ int ret;
+
+ ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+
+ if (ret == NF_ACCEPT) {
+ if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ /* NUL mapping */
+ ret = alloc_null_binding(ct, info, hooknum);
+ }
+ return ret;
+}
+
+static struct ipt_target ipt_snat_reg = {
+ .name = "SNAT",
+ .target = ipt_snat_target,
+ .checkentry = ipt_snat_checkentry,
+};
+
+static struct ipt_target ipt_dnat_reg = {
+ .name = "DNAT",
+ .target = ipt_dnat_target,
+ .checkentry = ipt_dnat_checkentry,
+};
+
+int __init ip_nat_rule_init(void)
+{
+ int ret;
+
+ ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
+ if (ret != 0)
+ return ret;
+ ret = ipt_register_target(&ipt_snat_reg);
+ if (ret != 0)
+ goto unregister_table;
+
+ ret = ipt_register_target(&ipt_dnat_reg);
+ if (ret != 0)
+ goto unregister_snat;
+
+ return ret;
+
+ unregister_snat:
+ ipt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+ ipt_unregister_table(&nat_table);
+
+ return ret;
+}
+
+void ip_nat_rule_cleanup(void)
+{
+ ipt_unregister_target(&ipt_dnat_reg);
+ ipt_unregister_target(&ipt_snat_reg);
+ ipt_unregister_table(&nat_table);
+}
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
new file mode 100644
index 00000000000..2a48b6e635a
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -0,0 +1,1347 @@
+/*
+ * ip_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified. The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Updates:
+ * 2000-08-06: Convert to new helper API (Harald Welte).
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <asm/uaccess.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+#define NOCT1(n) (u_int8_t )((n) & 0xff)
+
+static int debug;
+static DEFINE_SPINLOCK(snmp_lock);
+
+/*
+ * Application layer address mapping mimics the NAT mapping, but
+ * only for the first octet in this case (a more flexible system
+ * can be implemented if needed).
+ */
+struct oct1_map
+{
+ u_int8_t from;
+ u_int8_t to;
+};
+
+
+/*****************************************************************************
+ *
+ * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* Class */
+#define ASN1_UNI 0 /* Universal */
+#define ASN1_APL 1 /* Application */
+#define ASN1_CTX 2 /* Context */
+#define ASN1_PRV 3 /* Private */
+
+/* Tag */
+#define ASN1_EOC 0 /* End Of Contents */
+#define ASN1_BOL 1 /* Boolean */
+#define ASN1_INT 2 /* Integer */
+#define ASN1_BTS 3 /* Bit String */
+#define ASN1_OTS 4 /* Octet String */
+#define ASN1_NUL 5 /* Null */
+#define ASN1_OJI 6 /* Object Identifier */
+#define ASN1_OJD 7 /* Object Description */
+#define ASN1_EXT 8 /* External */
+#define ASN1_SEQ 16 /* Sequence */
+#define ASN1_SET 17 /* Set */
+#define ASN1_NUMSTR 18 /* Numerical String */
+#define ASN1_PRNSTR 19 /* Printable String */
+#define ASN1_TEXSTR 20 /* Teletext String */
+#define ASN1_VIDSTR 21 /* Video String */
+#define ASN1_IA5STR 22 /* IA5 String */
+#define ASN1_UNITIM 23 /* Universal Time */
+#define ASN1_GENTIM 24 /* General Time */
+#define ASN1_GRASTR 25 /* Graphical String */
+#define ASN1_VISSTR 26 /* Visible String */
+#define ASN1_GENSTR 27 /* General String */
+
+/* Primitive / Constructed methods*/
+#define ASN1_PRI 0 /* Primitive */
+#define ASN1_CON 1 /* Constructed */
+
+/*
+ * Error codes.
+ */
+#define ASN1_ERR_NOERROR 0
+#define ASN1_ERR_DEC_EMPTY 2
+#define ASN1_ERR_DEC_EOC_MISMATCH 3
+#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
+#define ASN1_ERR_DEC_BADVALUE 5
+
+/*
+ * ASN.1 context.
+ */
+struct asn1_ctx
+{
+ int error; /* Error condition */
+ unsigned char *pointer; /* Octet just to be decoded */
+ unsigned char *begin; /* First octet */
+ unsigned char *end; /* Octet after last octet */
+};
+
+/*
+ * Octet string (not null terminated)
+ */
+struct asn1_octstr
+{
+ unsigned char *data;
+ unsigned int len;
+};
+
+static void asn1_open(struct asn1_ctx *ctx,
+ unsigned char *buf,
+ unsigned int len)
+{
+ ctx->begin = buf;
+ ctx->end = buf + len;
+ ctx->pointer = buf;
+ ctx->error = ASN1_ERR_NOERROR;
+}
+
+static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
+{
+ if (ctx->pointer >= ctx->end) {
+ ctx->error = ASN1_ERR_DEC_EMPTY;
+ return 0;
+ }
+ *ch = *(ctx->pointer)++;
+ return 1;
+}
+
+static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
+{
+ unsigned char ch;
+
+ *tag = 0;
+
+ do
+ {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *tag <<= 7;
+ *tag |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned char ch;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *cls = (ch & 0xC0) >> 6;
+ *con = (ch & 0x20) >> 5;
+ *tag = (ch & 0x1F);
+
+ if (*tag == 0x1F) {
+ if (!asn1_tag_decode(ctx, tag))
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
+ unsigned int *def,
+ unsigned int *len)
+{
+ unsigned char ch, cnt;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch == 0x80)
+ *def = 0;
+ else {
+ *def = 1;
+
+ if (ch < 0x80)
+ *len = ch;
+ else {
+ cnt = (unsigned char) (ch & 0x7F);
+ *len = 0;
+
+ while (cnt > 0) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *len <<= 8;
+ *len |= ch;
+ cnt--;
+ }
+ }
+ }
+ return 1;
+}
+
+static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
+ unsigned char **eoc,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned int def, len;
+
+ if (!asn1_id_decode(ctx, cls, con, tag))
+ return 0;
+
+ if (!asn1_length_decode(ctx, &def, &len))
+ return 0;
+
+ if (def)
+ *eoc = ctx->pointer + len;
+ else
+ *eoc = NULL;
+ return 1;
+}
+
+static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ unsigned char ch;
+
+ if (eoc == 0) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+ return 1;
+ } else {
+ if (ctx->pointer != eoc) {
+ ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
+ return 0;
+ }
+ return 1;
+ }
+}
+
+static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ ctx->pointer = eoc;
+ return 1;
+}
+
+static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = (signed char) ch;
+ len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned int *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned int)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned char **octets,
+ unsigned int *len)
+{
+ unsigned char *ptr;
+
+ *len = 0;
+
+ *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
+ if (*octets == NULL) {
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+
+ ptr = *octets;
+ while (ctx->pointer < eoc) {
+ if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+ kfree(*octets);
+ *octets = NULL;
+ return 0;
+ }
+ (*len)++;
+ }
+ return 1;
+}
+
+static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
+ unsigned long *subid)
+{
+ unsigned char ch;
+
+ *subid = 0;
+
+ do {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *subid <<= 7;
+ *subid |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long **oid,
+ unsigned int *len)
+{
+ unsigned long subid;
+ unsigned int size;
+ unsigned long *optr;
+
+ size = eoc - ctx->pointer + 1;
+ *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+ if (*oid == NULL) {
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+
+ optr = *oid;
+
+ if (!asn1_subid_decode(ctx, &subid)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (subid < 40) {
+ optr [0] = 0;
+ optr [1] = subid;
+ } else if (subid < 80) {
+ optr [0] = 1;
+ optr [1] = subid - 40;
+ } else {
+ optr [0] = 2;
+ optr [1] = subid - 80;
+ }
+
+ *len = 2;
+ optr += 2;
+
+ while (ctx->pointer < eoc) {
+ if (++(*len) > size) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (!asn1_subid_decode(ctx, optr++)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * SNMP decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* SNMP Versions */
+#define SNMP_V1 0
+#define SNMP_V2C 1
+#define SNMP_V2 2
+#define SNMP_V3 3
+
+/* Default Sizes */
+#define SNMP_SIZE_COMM 256
+#define SNMP_SIZE_OBJECTID 128
+#define SNMP_SIZE_BUFCHR 256
+#define SNMP_SIZE_BUFINT 128
+#define SNMP_SIZE_SMALLOBJECTID 16
+
+/* Requests */
+#define SNMP_PDU_GET 0
+#define SNMP_PDU_NEXT 1
+#define SNMP_PDU_RESPONSE 2
+#define SNMP_PDU_SET 3
+#define SNMP_PDU_TRAP1 4
+#define SNMP_PDU_BULK 5
+#define SNMP_PDU_INFORM 6
+#define SNMP_PDU_TRAP2 7
+
+/* Errors */
+#define SNMP_NOERROR 0
+#define SNMP_TOOBIG 1
+#define SNMP_NOSUCHNAME 2
+#define SNMP_BADVALUE 3
+#define SNMP_READONLY 4
+#define SNMP_GENERROR 5
+#define SNMP_NOACCESS 6
+#define SNMP_WRONGTYPE 7
+#define SNMP_WRONGLENGTH 8
+#define SNMP_WRONGENCODING 9
+#define SNMP_WRONGVALUE 10
+#define SNMP_NOCREATION 11
+#define SNMP_INCONSISTENTVALUE 12
+#define SNMP_RESOURCEUNAVAILABLE 13
+#define SNMP_COMMITFAILED 14
+#define SNMP_UNDOFAILED 15
+#define SNMP_AUTHORIZATIONERROR 16
+#define SNMP_NOTWRITABLE 17
+#define SNMP_INCONSISTENTNAME 18
+
+/* General SNMP V1 Traps */
+#define SNMP_TRAP_COLDSTART 0
+#define SNMP_TRAP_WARMSTART 1
+#define SNMP_TRAP_LINKDOWN 2
+#define SNMP_TRAP_LINKUP 3
+#define SNMP_TRAP_AUTFAILURE 4
+#define SNMP_TRAP_EQPNEIGHBORLOSS 5
+#define SNMP_TRAP_ENTSPECIFIC 6
+
+/* SNMPv1 Types */
+#define SNMP_NULL 0
+#define SNMP_INTEGER 1 /* l */
+#define SNMP_OCTETSTR 2 /* c */
+#define SNMP_DISPLAYSTR 2 /* c */
+#define SNMP_OBJECTID 3 /* ul */
+#define SNMP_IPADDR 4 /* uc */
+#define SNMP_COUNTER 5 /* ul */
+#define SNMP_GAUGE 6 /* ul */
+#define SNMP_TIMETICKS 7 /* ul */
+#define SNMP_OPAQUE 8 /* c */
+
+/* Additional SNMPv2 Types */
+#define SNMP_UINTEGER 5 /* ul */
+#define SNMP_BITSTR 9 /* uc */
+#define SNMP_NSAP 10 /* uc */
+#define SNMP_COUNTER64 11 /* ul */
+#define SNMP_NOSUCHOBJECT 12
+#define SNMP_NOSUCHINSTANCE 13
+#define SNMP_ENDOFMIBVIEW 14
+
+union snmp_syntax
+{
+ unsigned char uc[0]; /* 8 bit unsigned */
+ char c[0]; /* 8 bit signed */
+ unsigned long ul[0]; /* 32 bit unsigned */
+ long l[0]; /* 32 bit signed */
+};
+
+struct snmp_object
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned short type;
+ unsigned int syntax_len;
+ union snmp_syntax syntax;
+};
+
+struct snmp_request
+{
+ unsigned long id;
+ unsigned int error_status;
+ unsigned int error_index;
+};
+
+struct snmp_v1_trap
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned long ip_address; /* pointer */
+ unsigned int general;
+ unsigned int specific;
+ unsigned long time;
+};
+
+/* SNMP types */
+#define SNMP_IPA 0
+#define SNMP_CNT 1
+#define SNMP_GGE 2
+#define SNMP_TIT 3
+#define SNMP_OPQ 4
+#define SNMP_C64 6
+
+/* SNMP errors */
+#define SERR_NSO 0
+#define SERR_NSI 1
+#define SERR_EOM 2
+
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ u_int16_t *check);
+struct snmp_cnv
+{
+ unsigned int class;
+ unsigned int tag;
+ int syntax;
+};
+
+static struct snmp_cnv snmp_conv [] =
+{
+ {ASN1_UNI, ASN1_NUL, SNMP_NULL},
+ {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
+ {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
+ {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
+ {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
+ {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
+ {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
+ {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
+ {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
+ {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
+
+ /* SNMPv2 data types and errors */
+ {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
+ {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
+ {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
+ {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
+ {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
+ {0, 0, -1}
+};
+
+static unsigned char snmp_tag_cls2syntax(unsigned int tag,
+ unsigned int cls,
+ unsigned short *syntax)
+{
+ struct snmp_cnv *cnv;
+
+ cnv = snmp_conv;
+
+ while (cnv->syntax != -1) {
+ if (cnv->tag == tag && cnv->class == cls) {
+ *syntax = cnv->syntax;
+ return 1;
+ }
+ cnv++;
+ }
+ return 0;
+}
+
+static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
+ struct snmp_object **obj)
+{
+ unsigned int cls, con, tag, len, idlen;
+ unsigned short type;
+ unsigned char *eoc, *end, *p;
+ unsigned long *lp, *id;
+ unsigned long ul;
+ long l;
+
+ *obj = NULL;
+ id = NULL;
+
+ if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &id, &idlen))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
+ kfree(id);
+ return 0;
+ }
+
+ if (con != ASN1_PRI) {
+ kfree(id);
+ return 0;
+ }
+
+ if (!snmp_tag_cls2syntax(tag, cls, &type)) {
+ kfree(id);
+ return 0;
+ }
+
+ switch (type) {
+ case SNMP_INTEGER:
+ len = sizeof(long);
+ if (!asn1_long_decode(ctx, end, &l)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len,
+ GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ (*obj)->syntax.l[0] = l;
+ break;
+ case SNMP_OCTETSTR:
+ case SNMP_OPAQUE:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len,
+ GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.c, p, len);
+ kfree(p);
+ break;
+ case SNMP_NULL:
+ case SNMP_NOSUCHOBJECT:
+ case SNMP_NOSUCHINSTANCE:
+ case SNMP_ENDOFMIBVIEW:
+ len = 0;
+ *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ if (!asn1_null_decode(ctx, end)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ break;
+ case SNMP_OBJECTID:
+ if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
+ kfree(id);
+ return 0;
+ }
+ len *= sizeof(unsigned long);
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.ul, lp, len);
+ kfree(lp);
+ break;
+ case SNMP_IPADDR:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ if (len != 4) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.uc, p, len);
+ kfree(p);
+ break;
+ case SNMP_COUNTER:
+ case SNMP_GAUGE:
+ case SNMP_TIMETICKS:
+ len = sizeof(unsigned long);
+ if (!asn1_ulong_decode(ctx, end, &ul)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ (*obj)->syntax.ul[0] = ul;
+ break;
+ default:
+ kfree(id);
+ return 0;
+ }
+
+ (*obj)->syntax_len = len;
+ (*obj)->type = type;
+ (*obj)->id = id;
+ (*obj)->id_len = idlen;
+
+ if (!asn1_eoc_decode(ctx, eoc)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
+ struct snmp_request *request)
+{
+ unsigned int cls, con, tag;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_ulong_decode(ctx, end, &request->id))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_status))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_index))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Fast checksum update for possibly oddly-aligned UDP byte, from the
+ * code example in the draft.
+ */
+static void fast_csum(unsigned char *csum,
+ const unsigned char *optr,
+ const unsigned char *nptr,
+ int odd)
+{
+ long x, old, new;
+
+ x = csum[0] * 256 + csum[1];
+
+ x =~ x & 0xFFFF;
+
+ if (odd) old = optr[0] * 256;
+ else old = optr[0];
+
+ x -= old & 0xFFFF;
+ if (x <= 0) {
+ x--;
+ x &= 0xFFFF;
+ }
+
+ if (odd) new = nptr[0] * 256;
+ else new = nptr[0];
+
+ x += new & 0xFFFF;
+ if (x & 0x10000) {
+ x++;
+ x &= 0xFFFF;
+ }
+
+ x =~ x & 0xFFFF;
+ csum[0] = x / 256;
+ csum[1] = x & 0xFF;
+}
+
+/*
+ * Mangle IP address.
+ * - begin points to the start of the snmp messgae
+ * - addr points to the start of the address
+ */
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ u_int16_t *check)
+{
+ if (map->from == NOCT1(*addr)) {
+ u_int32_t old;
+
+ if (debug)
+ memcpy(&old, (unsigned char *)addr, sizeof(old));
+
+ *addr = map->to;
+
+ /* Update UDP checksum if being used */
+ if (*check) {
+ unsigned char odd = !((addr - begin) % 2);
+
+ fast_csum((unsigned char *)check,
+ &map->from, &map->to, odd);
+
+ }
+
+ if (debug)
+ printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
+ "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
+ }
+}
+
+static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
+ struct snmp_v1_trap *trap,
+ const struct oct1_map *map,
+ u_int16_t *check)
+{
+ unsigned int cls, con, tag, len;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_id_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
+ goto err_id_free;
+
+ if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
+ goto err_id_free;
+
+ /* IPv4 only */
+ if (len != 4)
+ goto err_addr_free;
+
+ mangle_address(ctx->begin, ctx->pointer - 4, map, check);
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->general))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->specific))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
+ goto err_addr_free;
+
+ if (!asn1_ulong_decode(ctx, end, &trap->time))
+ goto err_addr_free;
+
+ return 1;
+
+err_id_free:
+ kfree(trap->id);
+
+err_addr_free:
+ kfree((unsigned long *)trap->ip_address);
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Misc. routines
+ *
+ *****************************************************************************/
+
+static void hex_dump(unsigned char *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (i && !(i % 16))
+ printk("\n");
+ printk("%02x ", *(buf + i));
+ }
+ printk("\n");
+}
+
+/*
+ * Parse and mangle SNMP message according to mapping.
+ * (And this is the fucking 'basic' method).
+ */
+static int snmp_parse_mangle(unsigned char *msg,
+ u_int16_t len,
+ const struct oct1_map *map,
+ u_int16_t *check)
+{
+ unsigned char *eoc, *end;
+ unsigned int cls, con, tag, vers, pdutype;
+ struct asn1_ctx ctx;
+ struct asn1_octstr comm;
+ struct snmp_object **obj;
+
+ if (debug > 1)
+ hex_dump(msg, len);
+
+ asn1_open(&ctx, msg, len);
+
+ /*
+ * Start of SNMP message.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ /*
+ * Version 1 or 2 handled.
+ */
+ if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+ if (!asn1_uint_decode (&ctx, end, &vers))
+ return 0;
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
+ if (vers > 1)
+ return 1;
+
+ /*
+ * Community.
+ */
+ if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
+ return 0;
+ if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
+ return 0;
+ if (debug > 1) {
+ unsigned int i;
+
+ printk(KERN_DEBUG "bsalg: community: ");
+ for (i = 0; i < comm.len; i++)
+ printk("%c", comm.data[i]);
+ printk("\n");
+ }
+ kfree(comm.data);
+
+ /*
+ * PDU type
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
+ return 0;
+ if (cls != ASN1_CTX || con != ASN1_CON)
+ return 0;
+ if (debug > 1) {
+ unsigned char *pdus[] = {
+ [SNMP_PDU_GET] = "get",
+ [SNMP_PDU_NEXT] = "get-next",
+ [SNMP_PDU_RESPONSE] = "response",
+ [SNMP_PDU_SET] = "set",
+ [SNMP_PDU_TRAP1] = "trapv1",
+ [SNMP_PDU_BULK] = "bulk",
+ [SNMP_PDU_INFORM] = "inform",
+ [SNMP_PDU_TRAP2] = "trapv2"
+ };
+
+ if (pdutype > SNMP_PDU_TRAP2)
+ printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
+ else
+ printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
+ }
+ if (pdutype != SNMP_PDU_RESPONSE &&
+ pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
+ return 1;
+
+ /*
+ * Request header or v1 trap
+ */
+ if (pdutype == SNMP_PDU_TRAP1) {
+ struct snmp_v1_trap trap;
+ unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
+
+ /* Discard trap allocations regardless */
+ kfree(trap.id);
+ kfree((unsigned long *)trap.ip_address);
+
+ if (!ret)
+ return ret;
+
+ } else {
+ struct snmp_request req;
+
+ if (!snmp_request_decode(&ctx, &req))
+ return 0;
+
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
+ "error_index=%u\n", req.id, req.error_status,
+ req.error_index);
+ }
+
+ /*
+ * Loop through objects, look for IP addresses to mangle.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (obj == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
+ return 0;
+ }
+
+ while (!asn1_eoc_decode(&ctx, eoc)) {
+ unsigned int i;
+
+ if (!snmp_object_decode(&ctx, obj)) {
+ if (*obj) {
+ if ((*obj)->id)
+ kfree((*obj)->id);
+ kfree(*obj);
+ }
+ kfree(obj);
+ return 0;
+ }
+
+ if (debug > 1) {
+ printk(KERN_DEBUG "bsalg: object: ");
+ for (i = 0; i < (*obj)->id_len; i++) {
+ if (i > 0)
+ printk(".");
+ printk("%lu", (*obj)->id[i]);
+ }
+ printk(": type=%u\n", (*obj)->type);
+
+ }
+
+ if ((*obj)->type == SNMP_IPADDR)
+ mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+
+ kfree((*obj)->id);
+ kfree(*obj);
+ }
+ kfree(obj);
+
+ if (!asn1_eoc_decode(&ctx, eoc))
+ return 0;
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * NAT routines.
+ *
+ *****************************************************************************/
+
+/*
+ * SNMP translation routine.
+ */
+static int snmp_translate(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ struct sk_buff **pskb)
+{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+ u_int16_t udplen = ntohs(udph->len);
+ u_int16_t paylen = udplen - sizeof(struct udphdr);
+ int dir = CTINFO2DIR(ctinfo);
+ struct oct1_map map;
+
+ /*
+ * Determine mappping for application layer addresses based
+ * on NAT manipulations for the packet.
+ */
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ /* SNAT traps */
+ map.from = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
+ map.to = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
+ } else {
+ /* DNAT replies */
+ map.from = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
+ map.to = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
+ }
+
+ if (map.from == map.to)
+ return NF_ACCEPT;
+
+ if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
+ paylen, &map, &udph->check)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "bsalg: parser failed\n");
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted */
+static int help(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
+ return NF_ACCEPT;
+
+ /*
+ * Make sure the packet length is ok. So far, we were only guaranteed
+ * to have a valid length IP header plus 8 bytes, which means we have
+ * enough room for a UDP header. Just verify the UDP length field so we
+ * can mess around with the payload.
+ */
+ if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "SNMP: dropping malformed packet "
+ "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+ return NF_DROP;
+ }
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
+ return NF_DROP;
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, ctinfo, pskb);
+ spin_unlock_bh(&snmp_lock);
+ return ret;
+}
+
+static struct ip_conntrack_helper snmp_helper = {
+ .max_expected = 0,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "snmp",
+
+ .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } },
+ .dst = { .protonum = IPPROTO_UDP },
+ },
+ .mask = { .src = { .u = { 0xFFFF } },
+ .dst = { .protonum = 0xFF },
+ },
+};
+
+static struct ip_conntrack_helper snmp_trap_helper = {
+ .max_expected = 0,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "snmp_trap",
+
+ .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } },
+ .dst = { .protonum = IPPROTO_UDP },
+ },
+ .mask = { .src = { .u = { 0xFFFF } },
+ .dst = { .protonum = 0xFF },
+ },
+};
+
+/*****************************************************************************
+ *
+ * Module stuff.
+ *
+ *****************************************************************************/
+
+static int __init init(void)
+{
+ int ret = 0;
+
+ ret = ip_conntrack_helper_register(&snmp_helper);
+ if (ret < 0)
+ return ret;
+ ret = ip_conntrack_helper_register(&snmp_trap_helper);
+ if (ret < 0) {
+ ip_conntrack_helper_unregister(&snmp_helper);
+ return ret;
+ }
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ ip_conntrack_helper_unregister(&snmp_helper);
+ ip_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(init);
+module_exit(fini);
+
+module_param(debug, bool, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
new file mode 100644
index 00000000000..dec4a74212c
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -0,0 +1,349 @@
+/* This file contains all the functions required for the standalone
+ ip_nat module.
+
+ These are not required by the compatibility layer.
+*/
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ * - new API and handling of conntrack/nat helpers
+ * - now capable of multiple expectations for one master
+ * */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/spinlock.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
+ : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
+ : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
+ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
+ : "*ERROR*")))
+
+static unsigned int
+ip_nat_fn(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ struct ip_nat_info *info;
+ /* maniptype == SRC for postrouting. */
+ enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ and local-out, and ip_nat_out protects post-routing. */
+ IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
+ & htons(IP_MF|IP_OFFSET)));
+
+ (*pskb)->nfcache |= NFC_UNKNOWN;
+
+ /* If we had a hardware checksum before, it's now invalid */
+ if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if (skb_checksum_help(*pskb, (out == NULL)))
+ return NF_DROP;
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ have dropped it. Hence it's the user's responsibilty to
+ packet filter it out, or implement conntrack/NAT for that
+ protocol. 8) --RR */
+ if (!ct) {
+ /* Exception: ICMP redirect to new connection (not in
+ hash table yet). We must not let this through, in
+ case we're doing NAT to the same network. */
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ struct icmphdr _hdr, *hp;
+
+ hp = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_hdr), &_hdr);
+ if (hp != NULL &&
+ hp->type == ICMP_REDIRECT)
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED+IP_CT_IS_REPLY:
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ if (!icmp_reply_translation(pskb, ct, maniptype,
+ CTINFO2DIR(ctinfo)))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ info = &ct->nat.info;
+
+ /* Seen it before? This can happen for loopback, retrans,
+ or local packets.. */
+ if (!ip_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ /* LOCAL_IN hook doesn't have a chain! */
+ if (hooknum == NF_IP_LOCAL_IN)
+ ret = alloc_null_binding(ct, info, hooknum);
+ else
+ ret = ip_nat_rule_find(pskb, hooknum,
+ in, out, ct,
+ info);
+
+ if (ret != NF_ACCEPT) {
+ return ret;
+ }
+ } else
+ DEBUGP("Already setup manip %s for ct %p\n",
+ maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ break;
+
+ default:
+ /* ESTABLISHED */
+ IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
+ || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+ info = &ct->nat.info;
+ }
+
+ IP_NF_ASSERT(info);
+ return nat_packet(ct, ctinfo, hooknum, pskb);
+}
+
+static unsigned int
+ip_nat_in(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ u_int32_t saddr, daddr;
+ unsigned int ret;
+
+ saddr = (*pskb)->nh.iph->saddr;
+ daddr = (*pskb)->nh.iph->daddr;
+
+ ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN
+ && ((*pskb)->nh.iph->saddr != saddr
+ || (*pskb)->nh.iph->daddr != daddr)) {
+ dst_release((*pskb)->dst);
+ (*pskb)->dst = NULL;
+ }
+ return ret;
+}
+
+static unsigned int
+ip_nat_out(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ /* We can hit fragment here; forwarded packets get
+ defragmented by connection tracking coming in, then
+ fragmented (grr) by the forward code.
+
+ In future: If we have nfct != NULL, AND we have NAT
+ initialized, AND there is no helper, then we can do full
+ NAPT on the head, and IP-address-only NAT on the rest.
+
+ I'm starting to have nightmares about fragments. */
+
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
+
+ if (!*pskb)
+ return NF_STOLEN;
+ }
+
+ return ip_nat_fn(hooknum, pskb, in, out, okfn);
+}
+
+static unsigned int
+ip_nat_local_fn(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ u_int32_t saddr, daddr;
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ saddr = (*pskb)->nh.iph->saddr;
+ daddr = (*pskb)->nh.iph->daddr;
+
+ ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN
+ && ((*pskb)->nh.iph->saddr != saddr
+ || (*pskb)->nh.iph->daddr != daddr))
+ return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ return ret;
+}
+
+/* We must be after connection tracking and before packet filtering. */
+
+/* Before packet filtering, change destination */
+static struct nf_hook_ops ip_nat_in_ops = {
+ .hook = ip_nat_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+};
+
+/* After packet filtering, change source */
+static struct nf_hook_ops ip_nat_out_ops = {
+ .hook = ip_nat_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+};
+
+/* Before packet filtering, change destination */
+static struct nf_hook_ops ip_nat_local_out_ops = {
+ .hook = ip_nat_local_fn,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+};
+
+/* After packet filtering, change source for reply packets of LOCAL_OUT DNAT */
+static struct nf_hook_ops ip_nat_local_in_ops = {
+ .hook = ip_nat_fn,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+};
+
+static int init_or_cleanup(int init)
+{
+ int ret = 0;
+
+ need_ip_conntrack();
+
+ if (!init) goto cleanup;
+
+ ret = ip_nat_rule_init();
+ if (ret < 0) {
+ printk("ip_nat_init: can't setup rules.\n");
+ goto cleanup_nothing;
+ }
+ ret = ip_nat_init();
+ if (ret < 0) {
+ printk("ip_nat_init: can't setup rules.\n");
+ goto cleanup_rule_init;
+ }
+ ret = nf_register_hook(&ip_nat_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register in hook.\n");
+ goto cleanup_nat;
+ }
+ ret = nf_register_hook(&ip_nat_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register out hook.\n");
+ goto cleanup_inops;
+ }
+ ret = nf_register_hook(&ip_nat_local_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register local out hook.\n");
+ goto cleanup_outops;
+ }
+ ret = nf_register_hook(&ip_nat_local_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register local in hook.\n");
+ goto cleanup_localoutops;
+ }
+ return ret;
+
+ cleanup:
+ nf_unregister_hook(&ip_nat_local_in_ops);
+ cleanup_localoutops:
+ nf_unregister_hook(&ip_nat_local_out_ops);
+ cleanup_outops:
+ nf_unregister_hook(&ip_nat_out_ops);
+ cleanup_inops:
+ nf_unregister_hook(&ip_nat_in_ops);
+ cleanup_nat:
+ ip_nat_cleanup();
+ cleanup_rule_init:
+ ip_nat_rule_cleanup();
+ cleanup_nothing:
+ MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
+ return ret;
+}
+
+static int __init init(void)
+{
+ return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+ init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+EXPORT_SYMBOL(ip_nat_setup_info);
+EXPORT_SYMBOL(ip_nat_protocol_register);
+EXPORT_SYMBOL(ip_nat_protocol_unregister);
+EXPORT_SYMBOL(ip_nat_cheat_check);
+EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
+EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
+EXPORT_SYMBOL(ip_nat_used_tuple);
+EXPORT_SYMBOL(ip_nat_follow_master);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
new file mode 100644
index 00000000000..0343e0d6467
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -0,0 +1,70 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Version: 0.0.7
+ *
+ * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
+ * - Port to newnat API
+ *
+ * This module currently supports DNAT:
+ * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
+ *
+ * and SNAT:
+ * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
+ *
+ * It has not been tested with
+ * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
+ * If you do test this please let me know if it works or not.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/moduleparam.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("tftp NAT helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *exp)
+{
+ exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+ exp->expectfn = ip_nat_follow_master;
+ if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+static void __exit fini(void)
+{
+ ip_nat_tftp_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
+}
+
+static int __init init(void)
+{
+ BUG_ON(ip_nat_tftp_hook);
+ ip_nat_tftp_hook = help;
+ return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
new file mode 100644
index 00000000000..9e40dffc204
--- /dev/null
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -0,0 +1,741 @@
+/*
+ * This is a module which is used for queueing IPv4 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
+ * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
+ * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
+ * Zander).
+ * 2000-08-01: Added Nick Williams' MAC support.
+ * 2002-06-25: Code cleanup.
+ * 2005-01-10: Added /proc counter for dropped packets; fixed so
+ * packets aren't delivered to user space if they're going
+ * to be dropped.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/route.h>
+
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip_queue"
+#define NET_IPQ_QMAX 2088
+#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
+
+struct ipq_rt_info {
+ __u8 tos;
+ __u32 daddr;
+ __u32 saddr;
+};
+
+struct ipq_queue_entry {
+ struct list_head list;
+ struct nf_info *info;
+ struct sk_buff *skb;
+ struct ipq_rt_info rt_info;
+};
+
+typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+
+static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static DEFINE_RWLOCK(queue_lock);
+static int peer_pid;
+static unsigned int copy_range;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl;
+static LIST_HEAD(queue_list);
+static DECLARE_MUTEX(ipqnl_sem);
+
+static void
+ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+{
+ nf_reinject(entry->skb, entry->info, verdict);
+ kfree(entry);
+}
+
+static inline void
+__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+{
+ list_add(&entry->list, &queue_list);
+ queue_total++;
+}
+
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct ipq_queue_entry *
+__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct list_head *p;
+
+ list_for_each_prev(p, &queue_list) {
+ struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+
+ if (!cmpfn || cmpfn(entry, data))
+ return entry;
+ }
+ return NULL;
+}
+
+static inline void
+__ipq_dequeue_entry(struct ipq_queue_entry *entry)
+{
+ list_del(&entry->list);
+ queue_total--;
+}
+
+static inline struct ipq_queue_entry *
+__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct ipq_queue_entry *entry;
+
+ entry = __ipq_find_entry(cmpfn, data);
+ if (entry == NULL)
+ return NULL;
+
+ __ipq_dequeue_entry(entry);
+ return entry;
+}
+
+
+static inline void
+__ipq_flush(int verdict)
+{
+ struct ipq_queue_entry *entry;
+
+ while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
+ ipq_issue_verdict(entry, verdict);
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status = 0;
+
+ switch(mode) {
+ case IPQ_COPY_NONE:
+ case IPQ_COPY_META:
+ copy_mode = mode;
+ copy_range = 0;
+ break;
+
+ case IPQ_COPY_PACKET:
+ copy_mode = mode;
+ copy_range = range;
+ if (copy_range > 0xFFFF)
+ copy_range = 0xFFFF;
+ break;
+
+ default:
+ status = -EINVAL;
+
+ }
+ return status;
+}
+
+static inline void
+__ipq_reset(void)
+{
+ peer_pid = 0;
+ net_disable_timestamp();
+ __ipq_set_mode(IPQ_COPY_NONE, 0);
+ __ipq_flush(NF_DROP);
+}
+
+static struct ipq_queue_entry *
+ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct ipq_queue_entry *entry;
+
+ write_lock_bh(&queue_lock);
+ entry = __ipq_find_dequeue_entry(cmpfn, data);
+ write_unlock_bh(&queue_lock);
+ return entry;
+}
+
+static void
+ipq_flush(int verdict)
+{
+ write_lock_bh(&queue_lock);
+ __ipq_flush(verdict);
+ write_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+{
+ unsigned char *old_tail;
+ size_t size = 0;
+ size_t data_len = 0;
+ struct sk_buff *skb;
+ struct ipq_packet_msg *pmsg;
+ struct nlmsghdr *nlh;
+
+ read_lock_bh(&queue_lock);
+
+ switch (copy_mode) {
+ case IPQ_COPY_META:
+ case IPQ_COPY_NONE:
+ size = NLMSG_SPACE(sizeof(*pmsg));
+ data_len = 0;
+ break;
+
+ case IPQ_COPY_PACKET:
+ if (copy_range == 0 || copy_range > entry->skb->len)
+ data_len = entry->skb->len;
+ else
+ data_len = copy_range;
+
+ size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+ break;
+
+ default:
+ *errp = -EINVAL;
+ read_unlock_bh(&queue_lock);
+ return NULL;
+ }
+
+ read_unlock_bh(&queue_lock);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ goto nlmsg_failure;
+
+ old_tail= skb->tail;
+ nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+ pmsg = NLMSG_DATA(nlh);
+ memset(pmsg, 0, sizeof(*pmsg));
+
+ pmsg->packet_id = (unsigned long )entry;
+ pmsg->data_len = data_len;
+ pmsg->timestamp_sec = entry->skb->stamp.tv_sec;
+ pmsg->timestamp_usec = entry->skb->stamp.tv_usec;
+ pmsg->mark = entry->skb->nfmark;
+ pmsg->hook = entry->info->hook;
+ pmsg->hw_protocol = entry->skb->protocol;
+
+ if (entry->info->indev)
+ strcpy(pmsg->indev_name, entry->info->indev->name);
+ else
+ pmsg->indev_name[0] = '\0';
+
+ if (entry->info->outdev)
+ strcpy(pmsg->outdev_name, entry->info->outdev->name);
+ else
+ pmsg->outdev_name[0] = '\0';
+
+ if (entry->info->indev && entry->skb->dev) {
+ pmsg->hw_type = entry->skb->dev->type;
+ if (entry->skb->dev->hard_header_parse)
+ pmsg->hw_addrlen =
+ entry->skb->dev->hard_header_parse(entry->skb,
+ pmsg->hw_addr);
+ }
+
+ if (data_len)
+ if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+ BUG();
+
+ nlh->nlmsg_len = skb->tail - old_tail;
+ return skb;
+
+nlmsg_failure:
+ if (skb)
+ kfree_skb(skb);
+ *errp = -EINVAL;
+ printk(KERN_ERR "ip_queue: error creating packet message\n");
+ return NULL;
+}
+
+static int
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+{
+ int status = -EINVAL;
+ struct sk_buff *nskb;
+ struct ipq_queue_entry *entry;
+
+ if (copy_mode == IPQ_COPY_NONE)
+ return -EAGAIN;
+
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ if (entry == NULL) {
+ printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
+ return -ENOMEM;
+ }
+
+ entry->info = info;
+ entry->skb = skb;
+
+ if (entry->info->hook == NF_IP_LOCAL_OUT) {
+ struct iphdr *iph = skb->nh.iph;
+
+ entry->rt_info.tos = iph->tos;
+ entry->rt_info.daddr = iph->daddr;
+ entry->rt_info.saddr = iph->saddr;
+ }
+
+ nskb = ipq_build_packet_message(entry, &status);
+ if (nskb == NULL)
+ goto err_out_free;
+
+ write_lock_bh(&queue_lock);
+
+ if (!peer_pid)
+ goto err_out_free_nskb;
+
+ if (queue_total >= queue_maxlen) {
+ queue_dropped++;
+ status = -ENOSPC;
+ if (net_ratelimit())
+ printk (KERN_WARNING "ip_queue: full at %d entries, "
+ "dropping packets(s). Dropped: %d\n", queue_total,
+ queue_dropped);
+ goto err_out_free_nskb;
+ }
+
+ /* netlink_unicast will either free the nskb or attach it to a socket */
+ status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+ if (status < 0) {
+ queue_user_dropped++;
+ goto err_out_unlock;
+ }
+
+ __ipq_enqueue_entry(entry);
+
+ write_unlock_bh(&queue_lock);
+ return status;
+
+err_out_free_nskb:
+ kfree_skb(nskb);
+
+err_out_unlock:
+ write_unlock_bh(&queue_lock);
+
+err_out_free:
+ kfree(entry);
+ return status;
+}
+
+static int
+ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
+{
+ int diff;
+ struct iphdr *user_iph = (struct iphdr *)v->payload;
+
+ if (v->data_len < sizeof(*user_iph))
+ return 0;
+ diff = v->data_len - e->skb->len;
+ if (diff < 0)
+ skb_trim(e->skb, v->data_len);
+ else if (diff > 0) {
+ if (v->data_len > 0xFFFF)
+ return -EINVAL;
+ if (diff > skb_tailroom(e->skb)) {
+ struct sk_buff *newskb;
+
+ newskb = skb_copy_expand(e->skb,
+ skb_headroom(e->skb),
+ diff,
+ GFP_ATOMIC);
+ if (newskb == NULL) {
+ printk(KERN_WARNING "ip_queue: OOM "
+ "in mangle, dropping packet\n");
+ return -ENOMEM;
+ }
+ if (e->skb->sk)
+ skb_set_owner_w(newskb, e->skb->sk);
+ kfree_skb(e->skb);
+ e->skb = newskb;
+ }
+ skb_put(e->skb, diff);
+ }
+ if (!skb_ip_make_writable(&e->skb, v->data_len))
+ return -ENOMEM;
+ memcpy(e->skb->data, v->payload, v->data_len);
+ e->skb->nfcache |= NFC_ALTERED;
+
+ /*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ */
+ if (e->info->hook == NF_IP_LOCAL_OUT) {
+ struct iphdr *iph = e->skb->nh.iph;
+
+ if (!(iph->tos == e->rt_info.tos
+ && iph->daddr == e->rt_info.daddr
+ && iph->saddr == e->rt_info.saddr))
+ return ip_route_me_harder(&e->skb);
+ }
+ return 0;
+}
+
+static inline int
+id_cmp(struct ipq_queue_entry *e, unsigned long id)
+{
+ return (id == (unsigned long )e);
+}
+
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+ struct ipq_queue_entry *entry;
+
+ if (vmsg->value > NF_MAX_VERDICT)
+ return -EINVAL;
+
+ entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+ if (entry == NULL)
+ return -ENOENT;
+ else {
+ int verdict = vmsg->value;
+
+ if (vmsg->data_len && vmsg->data_len == len)
+ if (ipq_mangle_ipv4(vmsg, entry) < 0)
+ verdict = NF_DROP;
+
+ ipq_issue_verdict(entry, verdict);
+ return 0;
+ }
+}
+
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status;
+
+ write_lock_bh(&queue_lock);
+ status = __ipq_set_mode(mode, range);
+ write_unlock_bh(&queue_lock);
+ return status;
+}
+
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+ unsigned char type, unsigned int len)
+{
+ int status = 0;
+
+ if (len < sizeof(*pmsg))
+ return -EINVAL;
+
+ switch (type) {
+ case IPQM_MODE:
+ status = ipq_set_mode(pmsg->msg.mode.value,
+ pmsg->msg.mode.range);
+ break;
+
+ case IPQM_VERDICT:
+ if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
+ status = -EINVAL;
+ else
+ status = ipq_set_verdict(&pmsg->msg.verdict,
+ len - sizeof(*pmsg));
+ break;
+ default:
+ status = -EINVAL;
+ }
+ return status;
+}
+
+static int
+dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
+{
+ if (entry->info->indev)
+ if (entry->info->indev->ifindex == ifindex)
+ return 1;
+
+ if (entry->info->outdev)
+ if (entry->info->outdev->ifindex == ifindex)
+ return 1;
+
+ return 0;
+}
+
+static void
+ipq_dev_drop(int ifindex)
+{
+ struct ipq_queue_entry *entry;
+
+ while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
+ ipq_issue_verdict(entry, NF_DROP);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+ int status, type, pid, flags, nlmsglen, skblen;
+ struct nlmsghdr *nlh;
+
+ skblen = skb->len;
+ if (skblen < sizeof(*nlh))
+ return;
+
+ nlh = (struct nlmsghdr *)skb->data;
+ nlmsglen = nlh->nlmsg_len;
+ if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+ return;
+
+ pid = nlh->nlmsg_pid;
+ flags = nlh->nlmsg_flags;
+
+ if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (flags & MSG_TRUNC)
+ RCV_SKB_FAIL(-ECOMM);
+
+ type = nlh->nlmsg_type;
+ if (type < NLMSG_NOOP || type >= IPQM_MAX)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (type <= IPQM_BASE)
+ return;
+
+ if (security_netlink_recv(skb))
+ RCV_SKB_FAIL(-EPERM);
+
+ write_lock_bh(&queue_lock);
+
+ if (peer_pid) {
+ if (peer_pid != pid) {
+ write_unlock_bh(&queue_lock);
+ RCV_SKB_FAIL(-EBUSY);
+ }
+ } else {
+ net_enable_timestamp();
+ peer_pid = pid;
+ }
+
+ write_unlock_bh(&queue_lock);
+
+ status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+ skblen - NLMSG_LENGTH(0));
+ if (status < 0)
+ RCV_SKB_FAIL(status);
+
+ if (flags & NLM_F_ACK)
+ netlink_ack(skb, nlh, 0);
+ return;
+}
+
+static void
+ipq_rcv_sk(struct sock *sk, int len)
+{
+ do {
+ struct sk_buff *skb;
+
+ if (down_trylock(&ipqnl_sem))
+ return;
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ ipq_rcv_skb(skb);
+ kfree_skb(skb);
+ }
+
+ up(&ipqnl_sem);
+
+ } while (ipqnl && ipqnl->sk_receive_queue.qlen);
+}
+
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(dev->ifindex);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_dev_notifier = {
+ .notifier_call = ipq_rcv_dev_event,
+};
+
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_FIREWALL && n->pid) {
+ write_lock_bh(&queue_lock);
+ if (n->pid == peer_pid)
+ __ipq_reset();
+ write_unlock_bh(&queue_lock);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_nl_notifier = {
+ .notifier_call = ipq_rcv_nl_event,
+};
+
+static struct ctl_table_header *ipq_sysctl_header;
+
+static ctl_table ipq_table[] = {
+ {
+ .ctl_name = NET_IPQ_QMAX,
+ .procname = NET_IPQ_QMAX_NAME,
+ .data = &queue_maxlen,
+ .maxlen = sizeof(queue_maxlen),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ipq_dir_table[] = {
+ {
+ .ctl_name = NET_IPV4,
+ .procname = "ipv4",
+ .mode = 0555,
+ .child = ipq_table
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ipq_root_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = ipq_dir_table
+ },
+ { .ctl_name = 0 }
+};
+
+#ifdef CONFIG_PROC_FS
+static int
+ipq_get_info(char *buffer, char **start, off_t offset, int length)
+{
+ int len;
+
+ read_lock_bh(&queue_lock);
+
+ len = sprintf(buffer,
+ "Peer PID : %d\n"
+ "Copy mode : %hu\n"
+ "Copy range : %u\n"
+ "Queue length : %u\n"
+ "Queue max. length : %u\n"
+ "Queue dropped : %u\n"
+ "Netlink dropped : %u\n",
+ peer_pid,
+ copy_mode,
+ copy_range,
+ queue_total,
+ queue_maxlen,
+ queue_dropped,
+ queue_user_dropped);
+
+ read_unlock_bh(&queue_lock);
+
+ *start = buffer + offset;
+ len -= offset;
+ if (len > length)
+ len = length;
+ else if (len < 0)
+ len = 0;
+ return len;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int
+init_or_cleanup(int init)
+{
+ int status = -ENOMEM;
+ struct proc_dir_entry *proc;
+
+ if (!init)
+ goto cleanup;
+
+ netlink_register_notifier(&ipq_nl_notifier);
+ ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk);
+ if (ipqnl == NULL) {
+ printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+ proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
+ if (proc)
+ proc->owner = THIS_MODULE;
+ else {
+ printk(KERN_ERR "ip_queue: failed to create proc entry\n");
+ goto cleanup_ipqnl;
+ }
+
+ register_netdevice_notifier(&ipq_dev_notifier);
+ ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
+
+ status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL);
+ if (status < 0) {
+ printk(KERN_ERR "ip_queue: failed to register queue handler\n");
+ goto cleanup_sysctl;
+ }
+ return status;
+
+cleanup:
+ nf_unregister_queue_handler(PF_INET);
+ synchronize_net();
+ ipq_flush(NF_DROP);
+
+cleanup_sysctl:
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(IPQ_PROC_FS_NAME);
+
+cleanup_ipqnl:
+ sock_release(ipqnl->sk_socket);
+ down(&ipqnl_sem);
+ up(&ipqnl_sem);
+
+cleanup_netlink_notifier:
+ netlink_unregister_notifier(&ipq_nl_notifier);
+ return status;
+}
+
+static int __init init(void)
+{
+
+ return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+ init_or_cleanup(0);
+}
+
+MODULE_DESCRIPTION("IPv4 packet queue handler");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_LICENSE("GPL");
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
new file mode 100644
index 00000000000..8a54f92b849
--- /dev/null
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -0,0 +1,1964 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
+ * - increase module usage count as soon as we have rules inside
+ * a table
+ */
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
+#include <linux/err.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv4 packet filter");
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x) \
+do { \
+ if (!(x)) \
+ printk("IP_NF_ASSERT: %s:%s:%u\n", \
+ __FUNCTION__, __FILE__, __LINE__); \
+} while(0)
+#else
+#define IP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+static DECLARE_MUTEX(ipt_mutex);
+
+/* Must have mutex */
+#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+/*
+ We keep a set of rules for each CPU, so we can avoid write-locking
+ them in the softirq when updating the counters and therefore
+ only need to read-lock in the softirq; doing a write_lock_bh() in user
+ context stops packets coming through and allows user context to read
+ the counters or update the rules.
+
+ To be cache friendly on SMP, we arrange them like so:
+ [ n-entries ]
+ ... cache-align padding ...
+ [ n-entries ]
+
+ Hence the start of any table is given by get_table() below. */
+
+/* The table itself */
+struct ipt_table_info
+{
+ /* Size per table */
+ unsigned int size;
+ /* Number of entries: FIXME. --RR */
+ unsigned int number;
+ /* Initial number of entries. Needed for module usage count */
+ unsigned int initial_entries;
+
+ /* Entry points and underflows */
+ unsigned int hook_entry[NF_IP_NUMHOOKS];
+ unsigned int underflow[NF_IP_NUMHOOKS];
+
+ /* ipt_entry tables: one per CPU */
+ char entries[0] ____cacheline_aligned;
+};
+
+static LIST_HEAD(ipt_target);
+static LIST_HEAD(ipt_match);
+static LIST_HEAD(ipt_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+#if 0
+#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
+#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
+#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+#endif
+
+/* Returns whether matches rule or not. */
+static inline int
+ip_packet_match(const struct iphdr *ip,
+ const char *indev,
+ const char *outdev,
+ const struct ipt_ip *ipinfo,
+ int isfrag)
+{
+ size_t i;
+ unsigned long ret;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
+
+ if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ IPT_INV_SRCIP)
+ || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ IPT_INV_DSTIP)) {
+ dprintf("Source or dest mismatch.\n");
+
+ dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(ip->saddr),
+ NIPQUAD(ipinfo->smsk.s_addr),
+ NIPQUAD(ipinfo->src.s_addr),
+ ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(ip->daddr),
+ NIPQUAD(ipinfo->dmsk.s_addr),
+ NIPQUAD(ipinfo->dst.s_addr),
+ ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+ return 0;
+ }
+
+ /* Look for ifname matches; this should unroll nicely. */
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)indev)[i]
+ ^ ((const unsigned long *)ipinfo->iniface)[i])
+ & ((const unsigned long *)ipinfo->iniface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, ipinfo->iniface,
+ ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+ return 0;
+ }
+
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)outdev)[i]
+ ^ ((const unsigned long *)ipinfo->outiface)[i])
+ & ((const unsigned long *)ipinfo->outiface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, ipinfo->outiface,
+ ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+ return 0;
+ }
+
+ /* Check specific protocol */
+ if (ipinfo->proto
+ && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+ dprintf("Packet protocol %hi does not match %hi.%s\n",
+ ip->protocol, ipinfo->proto,
+ ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+ return 0;
+ }
+
+ /* If we have a fragment rule but the packet is not a fragment
+ * then we return zero */
+ if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
+ dprintf("Fragment rule but not fragment.%s\n",
+ ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int
+ip_checkentry(const struct ipt_ip *ip)
+{
+ if (ip->flags & ~IPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ip->flags & ~IPT_F_MASK);
+ return 0;
+ }
+ if (ip->invflags & ~IPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ ip->invflags & ~IPT_INV_MASK);
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned int
+ipt_error(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ if (net_ratelimit())
+ printk("ip_tables: error: `%s'\n", (char *)targinfo);
+
+ return NF_DROP;
+}
+
+static inline
+int do_match(struct ipt_entry_match *m,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int offset,
+ int *hotdrop)
+{
+ /* Stop iteration if it doesn't match */
+ if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
+ return 1;
+ else
+ return 0;
+}
+
+static inline struct ipt_entry *
+get_entry(void *base, unsigned int offset)
+{
+ return (struct ipt_entry *)(base + offset);
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ipt_do_table(struct sk_buff **pskb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct ipt_table *table,
+ void *userdata)
+{
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
+ u_int16_t offset;
+ struct iphdr *ip;
+ u_int16_t datalen;
+ int hotdrop = 0;
+ /* Initializing verdict to NF_DROP keeps gcc happy. */
+ unsigned int verdict = NF_DROP;
+ const char *indev, *outdev;
+ void *table_base;
+ struct ipt_entry *e, *back;
+
+ /* Initialization */
+ ip = (*pskb)->nh.iph;
+ datalen = (*pskb)->len - ip->ihl * 4;
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+ /* We handle fragments by dealing with the first fragment as
+ * if it was a normal packet. All other fragments are treated
+ * normally, except that they will NEVER match rules that ask
+ * things we don't know, ie. tcp syn flag or ports). If the
+ * rule is also a fragment-specific rule, non-fragments won't
+ * match it. */
+ offset = ntohs(ip->frag_off) & IP_OFFSET;
+
+ read_lock_bh(&table->lock);
+ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ table_base = (void *)table->private->entries
+ + TABLE_OFFSET(table->private, smp_processor_id());
+ e = get_entry(table_base, table->private->hook_entry[hook]);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ /* Check noone else using our table */
+ if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
+ && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
+ printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+ smp_processor_id(),
+ table->name,
+ &((struct ipt_entry *)table_base)->comefrom,
+ ((struct ipt_entry *)table_base)->comefrom);
+ }
+ ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
+#endif
+
+ /* For return from builtin chain */
+ back = get_entry(table_base, table->private->underflow[hook]);
+
+ do {
+ IP_NF_ASSERT(e);
+ IP_NF_ASSERT(back);
+ (*pskb)->nfcache |= e->nfcache;
+ if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+ struct ipt_entry_target *t;
+
+ if (IPT_MATCH_ITERATE(e, do_match,
+ *pskb, in, out,
+ offset, &hotdrop) != 0)
+ goto no_match;
+
+ ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ipt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
+ if (table_base + v
+ != (void *)e + e->next_offset) {
+ /* Save old back ptr in next entry */
+ struct ipt_entry *next
+ = (void *)e + e->next_offset;
+ next->comefrom
+ = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ } else {
+ /* Targets which reenter must return
+ abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0xeeeeeeec;
+#endif
+ verdict = t->u.kernel.target->target(pskb,
+ in, out,
+ hook,
+ t->data,
+ userdata);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (((struct ipt_entry *)table_base)->comefrom
+ != 0xeeeeeeec
+ && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
+ }
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0x57acc001;
+#endif
+ /* Target might have changed stuff. */
+ ip = (*pskb)->nh.iph;
+ datalen = (*pskb)->len - ip->ihl * 4;
+
+ if (verdict == IPT_CONTINUE)
+ e = (void *)e + e->next_offset;
+ else
+ /* Verdict */
+ break;
+ }
+ } else {
+
+ no_match:
+ e = (void *)e + e->next_offset;
+ }
+ } while (!hotdrop);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
+#endif
+ read_unlock_bh(&table->lock);
+
+#ifdef DEBUG_ALLOW_ALL
+ return NF_ACCEPT;
+#else
+ if (hotdrop)
+ return NF_DROP;
+ else return verdict;
+#endif
+}
+
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use try_then_request_module().
+ */
+
+/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
+static inline struct ipt_table *find_table_lock(const char *name)
+{
+ struct ipt_table *t;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(t, &ipt_tables, list)
+ if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ return t;
+ up(&ipt_mutex);
+ return NULL;
+}
+
+/* Find match, grabs ref. Returns ERR_PTR() on error. */
+static inline struct ipt_match *find_match(const char *name, u8 revision)
+{
+ struct ipt_match *m;
+ int err = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(m, &ipt_match, list) {
+ if (strcmp(m->name, name) == 0) {
+ if (m->revision == revision) {
+ if (try_module_get(m->me)) {
+ up(&ipt_mutex);
+ return m;
+ }
+ } else
+ err = -EPROTOTYPE; /* Found something. */
+ }
+ }
+ up(&ipt_mutex);
+ return ERR_PTR(err);
+}
+
+/* Find target, grabs ref. Returns ERR_PTR() on error. */
+static inline struct ipt_target *find_target(const char *name, u8 revision)
+{
+ struct ipt_target *t;
+ int err = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(t, &ipt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision == revision) {
+ if (try_module_get(t->me)) {
+ up(&ipt_mutex);
+ return t;
+ }
+ } else
+ err = -EPROTOTYPE; /* Found something. */
+ }
+ }
+ up(&ipt_mutex);
+ return ERR_PTR(err);
+}
+
+struct ipt_target *ipt_find_target(const char *name, u8 revision)
+{
+ struct ipt_target *target;
+
+ target = try_then_request_module(find_target(name, revision),
+ "ipt_%s", name);
+ if (IS_ERR(target) || !target)
+ return NULL;
+ return target;
+}
+
+static int match_revfn(const char *name, u8 revision, int *bestp)
+{
+ struct ipt_match *m;
+ int have_rev = 0;
+
+ list_for_each_entry(m, &ipt_match, list) {
+ if (strcmp(m->name, name) == 0) {
+ if (m->revision > *bestp)
+ *bestp = m->revision;
+ if (m->revision == revision)
+ have_rev = 1;
+ }
+ }
+ return have_rev;
+}
+
+static int target_revfn(const char *name, u8 revision, int *bestp)
+{
+ struct ipt_target *t;
+ int have_rev = 0;
+
+ list_for_each_entry(t, &ipt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision > *bestp)
+ *bestp = t->revision;
+ if (t->revision == revision)
+ have_rev = 1;
+ }
+ }
+ return have_rev;
+}
+
+/* Returns true or false (if no such extension at all) */
+static inline int find_revision(const char *name, u8 revision,
+ int (*revfn)(const char *, u8, int *),
+ int *err)
+{
+ int have_rev, best = -1;
+
+ if (down_interruptible(&ipt_mutex) != 0) {
+ *err = -EINTR;
+ return 1;
+ }
+ have_rev = revfn(name, revision, &best);
+ up(&ipt_mutex);
+
+ /* Nothing at all? Return 0 to try loading module. */
+ if (best == -1) {
+ *err = -ENOENT;
+ return 0;
+ }
+
+ *err = best;
+ if (!have_rev)
+ *err = -EPROTONOSUPPORT;
+ return 1;
+}
+
+
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ipt_ip *ip)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
+ if (((__u32 *)ip)[i])
+ return 0;
+
+ return 1;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ there are loops. Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ to 0 as we leave), and comefrom to save source hook bitmask */
+ for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct ipt_entry *e
+ = (struct ipt_entry *)(newinfo->entries + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ struct ipt_standard_target *t
+ = (void *)ipt_get_target(e);
+
+ if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+ printk("iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom
+ |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if (e->target_offset == sizeof(struct ipt_entry)
+ && (strcmp(t->target.u.user.name,
+ IPT_STANDARD_TARGET) == 0)
+ && t->verdict < 0
+ && unconditional(&e->ip)) {
+ unsigned int oldpos, size;
+
+ /* Return: backtrack through the last
+ big jump. */
+ do {
+ e->comefrom ^= (1<<NF_IP_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+ if (e->comefrom
+ & (1 << NF_IP_NUMHOOKS)) {
+ duprintf("Back unset "
+ "on hook %u "
+ "rule %u\n",
+ hook, pos);
+ }
+#endif
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct ipt_entry *)
+ (newinfo->entries + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct ipt_entry *)
+ (newinfo->entries + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ IPT_STANDARD_TARGET) == 0
+ && newpos >= 0) {
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct ipt_entry *)
+ (newinfo->entries + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static inline int
+cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+{
+ if (i && (*i)-- == 0)
+ return 1;
+
+ if (m->u.kernel.match->destroy)
+ m->u.kernel.match->destroy(m->data,
+ m->u.match_size - sizeof(*m));
+ module_put(m->u.kernel.match->me);
+ return 0;
+}
+
+static inline int
+standard_check(const struct ipt_entry_target *t,
+ unsigned int max_offset)
+{
+ struct ipt_standard_target *targ = (void *)t;
+
+ /* Check standard info. */
+ if (t->u.target_size
+ != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
+ duprintf("standard_check: target size %u != %u\n",
+ t->u.target_size,
+ IPT_ALIGN(sizeof(struct ipt_standard_target)));
+ return 0;
+ }
+
+ if (targ->verdict >= 0
+ && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
+ duprintf("ipt_standard_check: bad verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+
+ if (targ->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("ipt_standard_check: bad negative verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+ return 1;
+}
+
+static inline int
+check_match(struct ipt_entry_match *m,
+ const char *name,
+ const struct ipt_ip *ip,
+ unsigned int hookmask,
+ unsigned int *i)
+{
+ struct ipt_match *match;
+
+ match = try_then_request_module(find_match(m->u.user.name,
+ m->u.user.revision),
+ "ipt_%s", m->u.user.name);
+ if (IS_ERR(match) || !match) {
+ duprintf("check_match: `%s' not found\n", m->u.user.name);
+ return match ? PTR_ERR(match) : -ENOENT;
+ }
+ m->u.kernel.match = match;
+
+ if (m->u.kernel.match->checkentry
+ && !m->u.kernel.match->checkentry(name, ip, m->data,
+ m->u.match_size - sizeof(*m),
+ hookmask)) {
+ module_put(m->u.kernel.match->me);
+ duprintf("ip_tables: check failed for `%s'.\n",
+ m->u.kernel.match->name);
+ return -EINVAL;
+ }
+
+ (*i)++;
+ return 0;
+}
+
+static struct ipt_target ipt_standard_target;
+
+static inline int
+check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+ unsigned int *i)
+{
+ struct ipt_entry_target *t;
+ struct ipt_target *target;
+ int ret;
+ unsigned int j;
+
+ if (!ip_checkentry(&e->ip)) {
+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ j = 0;
+ ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+ if (ret != 0)
+ goto cleanup_matches;
+
+ t = ipt_get_target(e);
+ target = try_then_request_module(find_target(t->u.user.name,
+ t->u.user.revision),
+ "ipt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
+ duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
+ goto cleanup_matches;
+ }
+ t->u.kernel.target = target;
+
+ if (t->u.kernel.target == &ipt_standard_target) {
+ if (!standard_check(t, size)) {
+ ret = -EINVAL;
+ goto cleanup_matches;
+ }
+ } else if (t->u.kernel.target->checkentry
+ && !t->u.kernel.target->checkentry(name, e, t->data,
+ t->u.target_size
+ - sizeof(*t),
+ e->comefrom)) {
+ module_put(t->u.kernel.target->me);
+ duprintf("ip_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ ret = -EINVAL;
+ goto cleanup_matches;
+ }
+
+ (*i)++;
+ return 0;
+
+ cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ return ret;
+}
+
+static inline int
+check_entry_size_and_hooks(struct ipt_entry *e,
+ struct ipt_table_info *newinfo,
+ unsigned char *base,
+ unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int *i)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
+ || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* FIXME: underflows must be unconditional, standard verdicts
+ < 0 (not IPT_RETURN). --RR */
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct ipt_counters) { 0, 0 });
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+}
+
+static inline int
+cleanup_entry(struct ipt_entry *e, unsigned int *i)
+{
+ struct ipt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ /* Cleanup all matches */
+ IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+ t = ipt_get_target(e);
+ if (t->u.kernel.target->destroy)
+ t->u.kernel.target->destroy(t->data,
+ t->u.target_size - sizeof(*t));
+ module_put(t->u.kernel.target->me);
+ return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ newinfo) */
+static int
+translate_table(const char *name,
+ unsigned int valid_hooks,
+ struct ipt_table_info *newinfo,
+ unsigned int size,
+ unsigned int number,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows)
+{
+ unsigned int i;
+ int ret;
+
+ newinfo->size = size;
+ newinfo->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+ /* Walk through entries, checking offsets. */
+ ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry_size_and_hooks,
+ newinfo,
+ newinfo->entries,
+ newinfo->entries + size,
+ hook_entries, underflows, &i);
+ if (ret != 0)
+ return ret;
+
+ if (i != number) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, number);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, valid_hooks))
+ return -ELOOP;
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry, name, size, &i);
+
+ if (ret != 0) {
+ IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ cleanup_entry, &i);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for (i = 1; i < num_possible_cpus(); i++) {
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ newinfo->entries,
+ SMP_ALIGN(newinfo->size));
+ }
+
+ return ret;
+}
+
+static struct ipt_table_info *
+replace_table(struct ipt_table *table,
+ unsigned int num_counters,
+ struct ipt_table_info *newinfo,
+ int *error)
+{
+ struct ipt_table_info *oldinfo;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ {
+ struct ipt_entry *table_base;
+ unsigned int i;
+
+ for (i = 0; i < num_possible_cpus(); i++) {
+ table_base =
+ (void *)newinfo->entries
+ + TABLE_OFFSET(newinfo, i);
+
+ table_base->comefrom = 0xdead57ac;
+ }
+ }
+#endif
+
+ /* Do the substitution. */
+ write_lock_bh(&table->lock);
+ /* Check inside lock: is the old number correct? */
+ if (num_counters != table->private->number) {
+ duprintf("num_counters != table->private->number (%u/%u)\n",
+ num_counters, table->private->number);
+ write_unlock_bh(&table->lock);
+ *error = -EAGAIN;
+ return NULL;
+ }
+ oldinfo = table->private;
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+
+ return oldinfo;
+}
+
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+ struct ipt_counters total[],
+ unsigned int *i)
+{
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static void
+get_counters(const struct ipt_table_info *t,
+ struct ipt_counters counters[])
+{
+ unsigned int cpu;
+ unsigned int i;
+
+ for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ i = 0;
+ IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ t->size,
+ add_entry_to_counter,
+ counters,
+ &i);
+ }
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ struct ipt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num, countersize;
+ struct ipt_entry *e;
+ struct ipt_counters *counters;
+ int ret = 0;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct ipt_counters) * table->private->number;
+ counters = vmalloc(countersize);
+
+ if (counters == NULL)
+ return -ENOMEM;
+
+ /* First, sum counters... */
+ memset(counters, 0, countersize);
+ write_lock_bh(&table->lock);
+ get_counters(table->private, counters);
+ write_unlock_bh(&table->lock);
+
+ /* ... then copy entire thing from CPU 0... */
+ if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ unsigned int i;
+ struct ipt_entry_match *m;
+ struct ipt_entry_target *t;
+
+ e = (struct ipt_entry *)(table->private->entries + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct ipt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ for (i = sizeof(struct ipt_entry);
+ i < e->target_offset;
+ i += m->u.match_size) {
+ m = (void *)e + i;
+
+ if (copy_to_user(userptr + off + i
+ + offsetof(struct ipt_entry_match,
+ u.user.name),
+ m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name)+1)
+ != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ t = ipt_get_target(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct ipt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+static int
+get_entries(const struct ipt_get_entries *entries,
+ struct ipt_get_entries __user *uptr)
+{
+ int ret;
+ struct ipt_table *t;
+
+ t = find_table_lock(entries->name);
+ if (t && !IS_ERR(t)) {
+ duprintf("t->private->number = %u\n",
+ t->private->number);
+ if (entries->size == t->private->size)
+ ret = copy_entries_to_user(t->private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ t->private->size,
+ entries->size);
+ ret = -EINVAL;
+ }
+ module_put(t->me);
+ up(&ipt_mutex);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+ int ret;
+ struct ipt_replace tmp;
+ struct ipt_table *t;
+ struct ipt_table_info *newinfo, *oldinfo;
+ struct ipt_counters *counters;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* Hack: Causes ipchains to give correct error msg --RR */
+ if (len != sizeof(tmp) + tmp.size)
+ return -ENOPROTOOPT;
+
+ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+ return -ENOMEM;
+
+ newinfo = vmalloc(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ if (!newinfo)
+ return -ENOMEM;
+
+ if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto free_newinfo;
+ }
+ memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
+
+ ret = translate_table(tmp.name, tmp.valid_hooks,
+ newinfo, tmp.size, tmp.num_entries,
+ tmp.hook_entry, tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo_counters;
+
+ duprintf("ip_tables: Translated table\n");
+
+ t = try_then_request_module(find_table_lock(tmp.name),
+ "iptable_%s", tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free_newinfo_counters_untrans;
+ }
+
+ /* You lied! */
+ if (tmp.valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ tmp.valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto put_module;
+ }
+
+ oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters. */
+ get_counters(oldinfo, counters);
+ /* Decrease module usage counts and free resource */
+ IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+ vfree(oldinfo);
+ if (copy_to_user(tmp.counters, counters,
+ sizeof(struct ipt_counters) * tmp.num_counters) != 0)
+ ret = -EFAULT;
+ vfree(counters);
+ up(&ipt_mutex);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ up(&ipt_mutex);
+ free_newinfo_counters_untrans:
+ IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ free_newinfo_counters:
+ vfree(counters);
+ free_newinfo:
+ vfree(newinfo);
+ return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static inline int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct ipt_counters addme[],
+ unsigned int *i)
+{
+#if 0
+ duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+ *i,
+ (long unsigned int)e->counters.pcnt,
+ (long unsigned int)e->counters.bcnt,
+ (long unsigned int)addme[*i].pcnt,
+ (long unsigned int)addme[*i].bcnt);
+#endif
+
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static int
+do_add_counters(void __user *user, unsigned int len)
+{
+ unsigned int i;
+ struct ipt_counters_info tmp, *paddc;
+ struct ipt_table *t;
+ int ret = 0;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user, len) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = find_table_lock(tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+ }
+
+ write_lock_bh(&t->lock);
+ if (t->private->number != paddc->num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ IPT_ENTRY_ITERATE(t->private->entries,
+ t->private->size,
+ add_counter_to_entry,
+ paddc->counters,
+ &i);
+ unlock_up_free:
+ write_unlock_bh(&t->lock);
+ up(&ipt_mutex);
+ module_put(t->me);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+static int
+do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = do_replace(user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(user, len);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO: {
+ char name[IPT_TABLE_MAXNAMELEN];
+ struct ipt_table *t;
+
+ if (*len != sizeof(struct ipt_getinfo)) {
+ duprintf("length %u != %u\n", *len,
+ sizeof(struct ipt_getinfo));
+ ret = -EINVAL;
+ break;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+
+ t = try_then_request_module(find_table_lock(name),
+ "iptable_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct ipt_getinfo info;
+
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, t->private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, t->private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = t->private->number;
+ info.size = t->private->size;
+ memcpy(info.name, name, sizeof(info.name));
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+ up(&ipt_mutex);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ }
+ break;
+
+ case IPT_SO_GET_ENTRIES: {
+ struct ipt_get_entries get;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+ ret = -EINVAL;
+ } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+ ret = -EFAULT;
+ } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %u\n", *len,
+ sizeof(struct ipt_get_entries) + get.size);
+ ret = -EINVAL;
+ } else
+ ret = get_entries(&get, user);
+ break;
+ }
+
+ case IPT_SO_GET_REVISION_MATCH:
+ case IPT_SO_GET_REVISION_TARGET: {
+ struct ipt_get_revision rev;
+ int (*revfn)(const char *, u8, int *);
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (cmd == IPT_SO_GET_REVISION_TARGET)
+ revfn = target_revfn;
+ else
+ revfn = match_revfn;
+
+ try_then_request_module(find_revision(rev.name, rev.revision,
+ revfn, &ret),
+ "ipt_%s", rev.name);
+ break;
+ }
+
+ default:
+ duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/* Registration hooks for targets. */
+int
+ipt_register_target(struct ipt_target *target)
+{
+ int ret;
+
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0)
+ return ret;
+ list_add(&target->list, &ipt_target);
+ up(&ipt_mutex);
+ return ret;
+}
+
+void
+ipt_unregister_target(struct ipt_target *target)
+{
+ down(&ipt_mutex);
+ LIST_DELETE(&ipt_target, target);
+ up(&ipt_mutex);
+}
+
+int
+ipt_register_match(struct ipt_match *match)
+{
+ int ret;
+
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0)
+ return ret;
+
+ list_add(&match->list, &ipt_match);
+ up(&ipt_mutex);
+
+ return ret;
+}
+
+void
+ipt_unregister_match(struct ipt_match *match)
+{
+ down(&ipt_mutex);
+ LIST_DELETE(&ipt_match, match);
+ up(&ipt_mutex);
+}
+
+int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
+{
+ int ret;
+ struct ipt_table_info *newinfo;
+ static struct ipt_table_info bootstrap
+ = { 0, 0, 0, { 0 }, { 0 }, { } };
+
+ newinfo = vmalloc(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(repl->size) * num_possible_cpus());
+ if (!newinfo)
+ return -ENOMEM;
+
+ memcpy(newinfo->entries, repl->entries, repl->size);
+
+ ret = translate_table(table->name, table->valid_hooks,
+ newinfo, repl->size,
+ repl->num_entries,
+ repl->hook_entry,
+ repl->underflow);
+ if (ret != 0) {
+ vfree(newinfo);
+ return ret;
+ }
+
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0) {
+ vfree(newinfo);
+ return ret;
+ }
+
+ /* Don't autoload: we'd eat our tail... */
+ if (list_named_find(&ipt_tables, table->name)) {
+ ret = -EEXIST;
+ goto free_unlock;
+ }
+
+ /* Simplifies replace_table code. */
+ table->private = &bootstrap;
+ if (!replace_table(table, 0, newinfo, &ret))
+ goto free_unlock;
+
+ duprintf("table->private->number = %u\n",
+ table->private->number);
+
+ /* save number of initial entries */
+ table->private->initial_entries = table->private->number;
+
+ rwlock_init(&table->lock);
+ list_prepend(&ipt_tables, table);
+
+ unlock:
+ up(&ipt_mutex);
+ return ret;
+
+ free_unlock:
+ vfree(newinfo);
+ goto unlock;
+}
+
+void ipt_unregister_table(struct ipt_table *table)
+{
+ down(&ipt_mutex);
+ LIST_DELETE(&ipt_tables, table);
+ up(&ipt_mutex);
+
+ /* Decrease module usage counts and free resources */
+ IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ cleanup_entry, NULL);
+ vfree(table->private);
+}
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline int
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+{
+ int ret;
+
+ ret = (port >= min && port <= max) ^ invert;
+ return ret;
+}
+
+static int
+tcp_find_option(u_int8_t option,
+ const struct sk_buff *skb,
+ unsigned int optlen,
+ int invert,
+ int *hotdrop)
+{
+ /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+ u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+ unsigned int i;
+
+ duprintf("tcp_match: finding option\n");
+
+ if (!optlen)
+ return invert;
+
+ /* If we don't have the whole header, drop packet. */
+ op = skb_header_pointer(skb,
+ skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
+ optlen, _opt);
+ if (op == NULL) {
+ *hotdrop = 1;
+ return 0;
+ }
+
+ for (i = 0; i < optlen; ) {
+ if (op[i] == option) return !invert;
+ if (op[i] < 2) i++;
+ else i += op[i+1]?:1;
+ }
+
+ return invert;
+}
+
+static int
+tcp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ struct tcphdr _tcph, *th;
+ const struct ipt_tcp *tcpinfo = matchinfo;
+
+ if (offset) {
+ /* To quote Alan:
+
+ Don't allow a fragment of TCP 8 bytes in. Nobody normal
+ causes this. Its a cracker trying to break in by doing a
+ flag overwrite to pass the direction checks.
+ */
+ if (offset == 1) {
+ duprintf("Dropping evil TCP offset=1 frag.\n");
+ *hotdrop = 1;
+ }
+ /* Must not be a fragment. */
+ return 0;
+ }
+
+#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+
+ th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil TCP offset=0 tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+ ntohs(th->source),
+ !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
+ return 0;
+ if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+ ntohs(th->dest),
+ !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
+ return 0;
+ if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
+ == tcpinfo->flg_cmp,
+ IPT_TCP_INV_FLAGS))
+ return 0;
+ if (tcpinfo->option) {
+ if (th->doff * 4 < sizeof(_tcph)) {
+ *hotdrop = 1;
+ return 0;
+ }
+ if (!tcp_find_option(tcpinfo->option, skb,
+ th->doff*4 - sizeof(_tcph),
+ tcpinfo->invflags & IPT_TCP_INV_OPTION,
+ hotdrop))
+ return 0;
+ }
+ return 1;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ipt_tcp *tcpinfo = matchinfo;
+
+ /* Must specify proto == TCP, and no unknown invflags */
+ return ip->proto == IPPROTO_TCP
+ && !(ip->invflags & IPT_INV_PROTO)
+ && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
+ && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
+}
+
+static int
+udp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ struct udphdr _udph, *uh;
+ const struct ipt_udp *udpinfo = matchinfo;
+
+ /* Must not be a fragment. */
+ if (offset)
+ return 0;
+
+ uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil UDP tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ return port_match(udpinfo->spts[0], udpinfo->spts[1],
+ ntohs(uh->source),
+ !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
+ && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+ ntohs(uh->dest),
+ !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+udp_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_udp *udpinfo = matchinfo;
+
+ /* Must specify proto == UDP, and no unknown invflags */
+ if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
+ duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
+ IPPROTO_UDP);
+ return 0;
+ }
+ if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
+ duprintf("ipt_udp: matchsize %u != %u\n",
+ matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
+ return 0;
+ }
+ if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
+ duprintf("ipt_udp: unknown flags %X\n",
+ udpinfo->invflags);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline int
+icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+ u_int8_t type, u_int8_t code,
+ int invert)
+{
+ return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
+ ^ invert;
+}
+
+static int
+icmp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ struct icmphdr _icmph, *ic;
+ const struct ipt_icmp *icmpinfo = matchinfo;
+
+ /* Must not be a fragment. */
+ if (offset)
+ return 0;
+
+ ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_icmph), &_icmph);
+ if (ic == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil ICMP tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ return icmp_type_code_match(icmpinfo->type,
+ icmpinfo->code[0],
+ icmpinfo->code[1],
+ ic->type, ic->code,
+ !!(icmpinfo->invflags&IPT_ICMP_INV));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+icmp_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ipt_icmp *icmpinfo = matchinfo;
+
+ /* Must specify proto == ICMP, and no unknown invflags */
+ return ip->proto == IPPROTO_ICMP
+ && !(ip->invflags & IPT_INV_PROTO)
+ && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
+ && !(icmpinfo->invflags & ~IPT_ICMP_INV);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct ipt_target ipt_standard_target = {
+ .name = IPT_STANDARD_TARGET,
+};
+
+static struct ipt_target ipt_error_target = {
+ .name = IPT_ERROR_TARGET,
+ .target = ipt_error,
+};
+
+static struct nf_sockopt_ops ipt_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = IPT_BASE_CTL,
+ .set_optmax = IPT_SO_SET_MAX+1,
+ .set = do_ipt_set_ctl,
+ .get_optmin = IPT_BASE_CTL,
+ .get_optmax = IPT_SO_GET_MAX+1,
+ .get = do_ipt_get_ctl,
+};
+
+static struct ipt_match tcp_matchstruct = {
+ .name = "tcp",
+ .match = &tcp_match,
+ .checkentry = &tcp_checkentry,
+};
+
+static struct ipt_match udp_matchstruct = {
+ .name = "udp",
+ .match = &udp_match,
+ .checkentry = &udp_checkentry,
+};
+
+static struct ipt_match icmp_matchstruct = {
+ .name = "icmp",
+ .match = &icmp_match,
+ .checkentry = &icmp_checkentry,
+};
+
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const char *i,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if ((*count)++ >= start_offset) {
+ unsigned int namelen;
+
+ namelen = sprintf(buffer + *pos, "%s\n",
+ i + sizeof(struct list_head));
+ if (*pos + namelen > length) {
+ /* Stop iterating */
+ return 1;
+ }
+ *pos += namelen;
+ }
+ return 0;
+}
+
+static inline int print_target(const struct ipt_target *t,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if (t == &ipt_standard_target || t == &ipt_error_target)
+ return 0;
+ return print_name((char *)t, start_offset, buffer, length, pos, count);
+}
+
+static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&ipt_tables, print_name, void *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+
+ /* `start' hack - see fs/proc/generic.c line ~105 */
+ *start=(char *)((unsigned long)count-offset);
+ return pos;
+}
+
+static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&ipt_target, print_target, struct ipt_target *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+
+ *start = (char *)((unsigned long)count - offset);
+ return pos;
+}
+
+static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&ipt_match, print_name, void *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+
+ *start = (char *)((unsigned long)count - offset);
+ return pos;
+}
+
+static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
+{ { "ip_tables_names", ipt_get_tables },
+ { "ip_tables_targets", ipt_get_targets },
+ { "ip_tables_matches", ipt_get_matches },
+ { NULL, NULL} };
+#endif /*CONFIG_PROC_FS*/
+
+static int __init init(void)
+{
+ int ret;
+
+ /* Noone else will be downing sem now, so we won't sleep */
+ down(&ipt_mutex);
+ list_append(&ipt_target, &ipt_standard_target);
+ list_append(&ipt_target, &ipt_error_target);
+ list_append(&ipt_match, &tcp_matchstruct);
+ list_append(&ipt_match, &udp_matchstruct);
+ list_append(&ipt_match, &icmp_matchstruct);
+ up(&ipt_mutex);
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&ipt_sockopts);
+ if (ret < 0) {
+ duprintf("Unable to register sockopts.\n");
+ return ret;
+ }
+
+#ifdef CONFIG_PROC_FS
+ {
+ struct proc_dir_entry *proc;
+ int i;
+
+ for (i = 0; ipt_proc_entry[i].name; i++) {
+ proc = proc_net_create(ipt_proc_entry[i].name, 0,
+ ipt_proc_entry[i].get_info);
+ if (!proc) {
+ while (--i >= 0)
+ proc_net_remove(ipt_proc_entry[i].name);
+ nf_unregister_sockopt(&ipt_sockopts);
+ return -ENOMEM;
+ }
+ proc->owner = THIS_MODULE;
+ }
+ }
+#endif
+
+ printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ nf_unregister_sockopt(&ipt_sockopts);
+#ifdef CONFIG_PROC_FS
+ {
+ int i;
+ for (i = 0; ipt_proc_entry[i].name; i++)
+ proc_net_remove(ipt_proc_entry[i].name);
+ }
+#endif
+}
+
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_register_match);
+EXPORT_SYMBOL(ipt_unregister_match);
+EXPORT_SYMBOL(ipt_do_table);
+EXPORT_SYMBOL(ipt_register_target);
+EXPORT_SYMBOL(ipt_unregister_target);
+EXPORT_SYMBOL(ipt_find_target);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
new file mode 100644
index 00000000000..9842e6e2318
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -0,0 +1,92 @@
+/*
+ * This is a module which is used for setting the skb->priority field
+ * of an skb for qdisc classification.
+ */
+
+/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables qdisc classification target module");
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_classify_target_info *clinfo = targinfo;
+
+ if((*pskb)->priority != clinfo->priority) {
+ (*pskb)->priority = clinfo->priority;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+ printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+ return 0;
+ }
+
+ if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
+ (1 << NF_IP_POST_ROUTING))) {
+ printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
+ "and POST_ROUTING.\n");
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_ERR "CLASSIFY: can only be called from "
+ "\"mangle\" table, not \"%s\".\n",
+ tablename);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_classify_reg = {
+ .name = "CLASSIFY",
+ .target = target,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_classify_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_classify_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
new file mode 100644
index 00000000000..0f12e3a3dc7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -0,0 +1,761 @@
+/* Cluster IP hashmark target
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * based on ideas of Fabio Olive Leite <olive@unixforge.org>
+ *
+ * Development of this code funded by SuSE Linux AG, http://www.suse.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/proc_fs.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/checksum.h>
+
+#include <linux/netfilter_arp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#define CLUSTERIP_VERSION "0.6"
+
+#define DEBUG_CLUSTERIP
+
+#ifdef DEBUG_CLUSTERIP
+#define DEBUGP printk
+#else
+#define DEBUGP
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables target for CLUSTERIP");
+
+struct clusterip_config {
+ struct list_head list; /* list of all configs */
+ atomic_t refcount; /* reference count */
+
+ u_int32_t clusterip; /* the IP address */
+ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
+ struct net_device *dev; /* device */
+ u_int16_t num_total_nodes; /* total number of nodes */
+ u_int16_t num_local_nodes; /* number of local nodes */
+ u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *pde; /* proc dir entry */
+#endif
+ enum clusterip_hashmode hash_mode; /* which hashing mode */
+ u_int32_t hash_initval; /* hash initialization */
+};
+
+static LIST_HEAD(clusterip_configs);
+
+/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
+ * data within all structurses (num_local_nodes, local_nodes[]) */
+static DECLARE_RWLOCK(clusterip_lock);
+
+#ifdef CONFIG_PROC_FS
+static struct file_operations clusterip_proc_fops;
+static struct proc_dir_entry *clusterip_procdir;
+#endif
+
+static inline void
+clusterip_config_get(struct clusterip_config *c) {
+ atomic_inc(&c->refcount);
+}
+
+static inline void
+clusterip_config_put(struct clusterip_config *c) {
+ if (atomic_dec_and_test(&c->refcount)) {
+ WRITE_LOCK(&clusterip_lock);
+ list_del(&c->list);
+ WRITE_UNLOCK(&clusterip_lock);
+ dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+ dev_put(c->dev);
+ kfree(c);
+ }
+}
+
+
+static struct clusterip_config *
+__clusterip_config_find(u_int32_t clusterip)
+{
+ struct list_head *pos;
+
+ MUST_BE_READ_LOCKED(&clusterip_lock);
+ list_for_each(pos, &clusterip_configs) {
+ struct clusterip_config *c = list_entry(pos,
+ struct clusterip_config, list);
+ if (c->clusterip == clusterip) {
+ return c;
+ }
+ }
+
+ return NULL;
+}
+
+static inline struct clusterip_config *
+clusterip_config_find_get(u_int32_t clusterip)
+{
+ struct clusterip_config *c;
+
+ READ_LOCK(&clusterip_lock);
+ c = __clusterip_config_find(clusterip);
+ if (!c) {
+ READ_UNLOCK(&clusterip_lock);
+ return NULL;
+ }
+ atomic_inc(&c->refcount);
+ READ_UNLOCK(&clusterip_lock);
+
+ return c;
+}
+
+static struct clusterip_config *
+clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
+ struct net_device *dev)
+{
+ struct clusterip_config *c;
+ char buffer[16];
+
+ c = kmalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c)
+ return NULL;
+
+ memset(c, 0, sizeof(*c));
+ c->dev = dev;
+ c->clusterip = ip;
+ memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+ c->num_total_nodes = i->num_total_nodes;
+ c->num_local_nodes = i->num_local_nodes;
+ memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes));
+ c->hash_mode = i->hash_mode;
+ c->hash_initval = i->hash_initval;
+ atomic_set(&c->refcount, 1);
+
+#ifdef CONFIG_PROC_FS
+ /* create proc dir entry */
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir);
+ if (!c->pde) {
+ kfree(c);
+ return NULL;
+ }
+ c->pde->proc_fops = &clusterip_proc_fops;
+ c->pde->data = c;
+#endif
+
+ WRITE_LOCK(&clusterip_lock);
+ list_add(&c->list, &clusterip_configs);
+ WRITE_UNLOCK(&clusterip_lock);
+
+ return c;
+}
+
+static int
+clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+ int i;
+
+ WRITE_LOCK(&clusterip_lock);
+
+ if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
+ || nodenum > CLUSTERIP_MAX_NODES) {
+ WRITE_UNLOCK(&clusterip_lock);
+ return 1;
+ }
+
+ /* check if we alrady have this number in our array */
+ for (i = 0; i < c->num_local_nodes; i++) {
+ if (c->local_nodes[i] == nodenum) {
+ WRITE_UNLOCK(&clusterip_lock);
+ return 1;
+ }
+ }
+
+ c->local_nodes[c->num_local_nodes++] = nodenum;
+
+ WRITE_UNLOCK(&clusterip_lock);
+ return 0;
+}
+
+static int
+clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+ int i;
+
+ WRITE_LOCK(&clusterip_lock);
+
+ if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
+ WRITE_UNLOCK(&clusterip_lock);
+ return 1;
+ }
+
+ for (i = 0; i < c->num_local_nodes; i++) {
+ if (c->local_nodes[i] == nodenum) {
+ int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
+ memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
+ c->num_local_nodes--;
+ WRITE_UNLOCK(&clusterip_lock);
+ return 0;
+ }
+ }
+
+ WRITE_UNLOCK(&clusterip_lock);
+ return 1;
+}
+
+static inline u_int32_t
+clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
+{
+ struct iphdr *iph = skb->nh.iph;
+ unsigned long hashval;
+ u_int16_t sport, dport;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ struct icmphdr *ih;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ th = (void *)iph+iph->ihl*4;
+ sport = ntohs(th->source);
+ dport = ntohs(th->dest);
+ break;
+ case IPPROTO_UDP:
+ uh = (void *)iph+iph->ihl*4;
+ sport = ntohs(uh->source);
+ dport = ntohs(uh->dest);
+ break;
+ case IPPROTO_ICMP:
+ ih = (void *)iph+iph->ihl*4;
+ sport = ntohs(ih->un.echo.id);
+ dport = (ih->type<<8)|ih->code;
+ break;
+ default:
+ if (net_ratelimit()) {
+ printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
+ iph->protocol);
+ }
+ sport = dport = 0;
+ }
+
+ switch (config->hash_mode) {
+ case CLUSTERIP_HASHMODE_SIP:
+ hashval = jhash_1word(ntohl(iph->saddr),
+ config->hash_initval);
+ break;
+ case CLUSTERIP_HASHMODE_SIP_SPT:
+ hashval = jhash_2words(ntohl(iph->saddr), sport,
+ config->hash_initval);
+ break;
+ case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
+ hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
+ config->hash_initval);
+ break;
+ default:
+ /* to make gcc happy */
+ hashval = 0;
+ /* This cannot happen, unless the check function wasn't called
+ * at rule load time */
+ printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+ BUG();
+ break;
+ }
+
+ /* node numbers are 1..n, not 0..n */
+ return ((hashval % config->num_total_nodes)+1);
+}
+
+static inline int
+clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
+{
+ int i;
+
+ READ_LOCK(&clusterip_lock);
+
+ if (config->num_local_nodes == 0) {
+ READ_UNLOCK(&clusterip_lock);
+ return 0;
+ }
+
+ for (i = 0; i < config->num_local_nodes; i++) {
+ if (config->local_nodes[i] == hash) {
+ READ_UNLOCK(&clusterip_lock);
+ return 1;
+ }
+ }
+
+ READ_UNLOCK(&clusterip_lock);
+
+ return 0;
+}
+
+/***********************************************************************
+ * IPTABLES TARGET
+ ***********************************************************************/
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
+ u_int32_t hash;
+
+ /* don't need to clusterip_config_get() here, since refcount
+ * is only decremented by destroy() - and ip_tables guarantees
+ * that the ->target() function isn't called after ->destroy() */
+
+ if (!ct) {
+ printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
+ /* FIXME: need to drop invalid ones, since replies
+ * to outgoing connections of other nodes will be
+ * marked as INVALID */
+ return NF_DROP;
+ }
+
+ /* special case: ICMP error handling. conntrack distinguishes between
+ * error messages (RELATED) and information requests (see below) */
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+ && (ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_IS_REPLY+IP_CT_IS_REPLY))
+ return IPT_CONTINUE;
+
+ /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
+ * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+ * on, which all have an ID field [relevant for hashing]. */
+
+ hash = clusterip_hashfn(*pskb, cipinfo->config);
+
+ switch (ctinfo) {
+ case IP_CT_NEW:
+ ct->mark = hash;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED+IP_CT_IS_REPLY:
+ /* FIXME: we don't handle expectations at the
+ * moment. they can arrive on a different node than
+ * the master connection (e.g. FTP passive mode) */
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
+ break;
+ default:
+ break;
+ }
+
+#ifdef DEBUG_CLUSTERP
+ DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+#endif
+ DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark);
+ if (!clusterip_responsible(cipinfo->config, hash)) {
+ DEBUGP("not responsible\n");
+ return NF_DROP;
+ }
+ DEBUGP("responsible\n");
+
+ /* despite being received via linklayer multicast, this is
+ * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
+ (*pskb)->pkt_type = PACKET_HOST;
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+
+ struct clusterip_config *config;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) {
+ printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info)));
+ return 0;
+ }
+
+ if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
+ cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
+ cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
+ printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
+ cipinfo->hash_mode);
+ return 0;
+
+ }
+ if (e->ip.dmsk.s_addr != 0xffffffff
+ || e->ip.dst.s_addr == 0) {
+ printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
+ return 0;
+ }
+
+ /* FIXME: further sanity checks */
+
+ config = clusterip_config_find_get(e->ip.dst.s_addr);
+ if (!config) {
+ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
+ printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
+ return 0;
+ } else {
+ struct net_device *dev;
+
+ if (e->ip.iniface[0] == '\0') {
+ printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
+ return 0;
+ }
+
+ dev = dev_get_by_name(e->ip.iniface);
+ if (!dev) {
+ printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+ return 0;
+ }
+
+ config = clusterip_config_init(cipinfo,
+ e->ip.dst.s_addr, dev);
+ if (!config) {
+ printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
+ dev_put(dev);
+ return 0;
+ }
+ dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+ }
+ }
+
+ cipinfo->config = config;
+
+ return 1;
+}
+
+/* drop reference count of cluster config when rule is deleted */
+static void destroy(void *matchinfo, unsigned int matchinfosize)
+{
+ struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
+
+ /* we first remove the proc entry and then drop the reference
+ * count. In case anyone still accesses the file, the open/close
+ * functions are also incrementing the refcount on their own */
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(cipinfo->config->pde->name,
+ cipinfo->config->pde->parent);
+#endif
+ clusterip_config_put(cipinfo->config);
+}
+
+static struct ipt_target clusterip_tgt = {
+ .name = "CLUSTERIP",
+ .target = &target,
+ .checkentry = &checkentry,
+ .destroy = &destroy,
+ .me = THIS_MODULE
+};
+
+
+/***********************************************************************
+ * ARP MANGLING CODE
+ ***********************************************************************/
+
+/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
+struct arp_payload {
+ u_int8_t src_hw[ETH_ALEN];
+ u_int32_t src_ip;
+ u_int8_t dst_hw[ETH_ALEN];
+ u_int32_t dst_ip;
+} __attribute__ ((packed));
+
+#ifdef CLUSTERIP_DEBUG
+static void arp_print(struct arp_payload *payload)
+{
+#define HBUFFERLEN 30
+ char hbuffer[HBUFFERLEN];
+ int j,k;
+ const char hexbuf[]= "0123456789abcdef";
+
+ for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
+ hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15];
+ hbuffer[k++]=hexbuf[payload->src_hw[j]&15];
+ hbuffer[k++]=':';
+ }
+ hbuffer[--k]='\0';
+
+ printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n",
+ NIPQUAD(payload->src_ip), hbuffer,
+ NIPQUAD(payload->dst_ip));
+}
+#endif
+
+static unsigned int
+arp_mangle(unsigned int hook,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct arphdr *arp = (*pskb)->nh.arph;
+ struct arp_payload *payload;
+ struct clusterip_config *c;
+
+ /* we don't care about non-ethernet and non-ipv4 ARP */
+ if (arp->ar_hrd != htons(ARPHRD_ETHER)
+ || arp->ar_pro != htons(ETH_P_IP)
+ || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
+ return NF_ACCEPT;
+
+ /* we only want to mangle arp replies */
+ if (arp->ar_op != htons(ARPOP_REPLY))
+ return NF_ACCEPT;
+
+ payload = (void *)(arp+1);
+
+ /* if there is no clusterip configuration for the arp reply's
+ * source ip, we don't want to mangle it */
+ c = clusterip_config_find_get(payload->src_ip);
+ if (!c)
+ return NF_ACCEPT;
+
+ /* normally the linux kernel always replies to arp queries of
+ * addresses on different interfacs. However, in the CLUSTERIP case
+ * this wouldn't work, since we didn't subscribe the mcast group on
+ * other interfaces */
+ if (c->dev != out) {
+ DEBUGP("CLUSTERIP: not mangling arp reply on different "
+ "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name);
+ clusterip_config_put(c);
+ return NF_ACCEPT;
+ }
+
+ /* mangle reply hardware address */
+ memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
+
+#ifdef CLUSTERIP_DEBUG
+ DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+ arp_print(payload);
+#endif
+
+ clusterip_config_put(c);
+
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops cip_arp_ops = {
+ .hook = arp_mangle,
+ .pf = NF_ARP,
+ .hooknum = NF_ARP_OUT,
+ .priority = -1
+};
+
+/***********************************************************************
+ * PROC DIR HANDLING
+ ***********************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct clusterip_config *c = pde->data;
+ unsigned int *nodeidx;
+
+ READ_LOCK(&clusterip_lock);
+ if (*pos >= c->num_local_nodes)
+ return NULL;
+
+ nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL);
+ if (!nodeidx)
+ return ERR_PTR(-ENOMEM);
+
+ *nodeidx = *pos;
+ return nodeidx;
+}
+
+static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct clusterip_config *c = pde->data;
+ unsigned int *nodeidx = (unsigned int *)v;
+
+ *pos = ++(*nodeidx);
+ if (*pos >= c->num_local_nodes) {
+ kfree(v);
+ return NULL;
+ }
+ return nodeidx;
+}
+
+static void clusterip_seq_stop(struct seq_file *s, void *v)
+{
+ kfree(v);
+
+ READ_UNLOCK(&clusterip_lock);
+}
+
+static int clusterip_seq_show(struct seq_file *s, void *v)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct clusterip_config *c = pde->data;
+ unsigned int *nodeidx = (unsigned int *)v;
+
+ if (*nodeidx != 0)
+ seq_putc(s, ',');
+ seq_printf(s, "%u", c->local_nodes[*nodeidx]);
+
+ if (*nodeidx == c->num_local_nodes-1)
+ seq_putc(s, '\n');
+
+ return 0;
+}
+
+static struct seq_operations clusterip_seq_ops = {
+ .start = clusterip_seq_start,
+ .next = clusterip_seq_next,
+ .stop = clusterip_seq_stop,
+ .show = clusterip_seq_show,
+};
+
+static int clusterip_proc_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &clusterip_seq_ops);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+ struct proc_dir_entry *pde = PDE(inode);
+ struct clusterip_config *c = pde->data;
+
+ sf->private = pde;
+
+ clusterip_config_get(c);
+ }
+
+ return ret;
+}
+
+static int clusterip_proc_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *pde = PDE(inode);
+ struct clusterip_config *c = pde->data;
+ int ret;
+
+ ret = seq_release(inode, file);
+
+ if (!ret)
+ clusterip_config_put(c);
+
+ return ret;
+}
+
+static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
+ size_t size, loff_t *ofs)
+{
+#define PROC_WRITELEN 10
+ char buffer[PROC_WRITELEN+1];
+ struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode);
+ struct clusterip_config *c = pde->data;
+ unsigned long nodenum;
+
+ if (copy_from_user(buffer, input, PROC_WRITELEN))
+ return -EFAULT;
+
+ if (*buffer == '+') {
+ nodenum = simple_strtoul(buffer+1, NULL, 10);
+ if (clusterip_add_node(c, nodenum))
+ return -ENOMEM;
+ } else if (*buffer == '-') {
+ nodenum = simple_strtoul(buffer+1, NULL,10);
+ if (clusterip_del_node(c, nodenum))
+ return -ENOENT;
+ } else
+ return -EIO;
+
+ return size;
+}
+
+static struct file_operations clusterip_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = clusterip_proc_open,
+ .read = seq_read,
+ .write = clusterip_proc_write,
+ .llseek = seq_lseek,
+ .release = clusterip_proc_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+static int init_or_cleanup(int fini)
+{
+ int ret;
+
+ if (fini)
+ goto cleanup;
+
+ if (ipt_register_target(&clusterip_tgt)) {
+ ret = -EINVAL;
+ goto cleanup_none;
+ }
+
+ if (nf_register_hook(&cip_arp_ops) < 0) {
+ ret = -EINVAL;
+ goto cleanup_target;
+ }
+
+#ifdef CONFIG_PROC_FS
+ clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net);
+ if (!clusterip_procdir) {
+ printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+ ret = -ENOMEM;
+ goto cleanup_hook;
+ }
+#endif /* CONFIG_PROC_FS */
+
+ printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
+ CLUSTERIP_VERSION);
+
+ return 0;
+
+cleanup:
+ printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
+ CLUSTERIP_VERSION);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
+#endif
+cleanup_hook:
+ nf_unregister_hook(&cip_arp_ops);
+cleanup_target:
+ ipt_unregister_target(&clusterip_tgt);
+cleanup_none:
+ return -EINVAL;
+}
+
+static int __init init(void)
+{
+ return init_or_cleanup(0);
+}
+
+static void __exit fini(void)
+{
+ init_or_cleanup(1);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
new file mode 100644
index 00000000000..30ddd3e18eb
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -0,0 +1,118 @@
+/* This kernel module is used to modify the connection mark values, or
+ * to optionally restore the skb nfmark from the connection mark
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
+MODULE_DESCRIPTION("IP tables CONNMARK matching module");
+MODULE_LICENSE("GPL");
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_connmark_target_info *markinfo = targinfo;
+ unsigned long diff;
+ unsigned long nfmark;
+ unsigned long newmark;
+
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
+ if (ct) {
+ switch(markinfo->mode) {
+ case IPT_CONNMARK_SET:
+ newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != ct->mark)
+ ct->mark = newmark;
+ break;
+ case IPT_CONNMARK_SAVE:
+ newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
+ if (ct->mark != newmark)
+ ct->mark = newmark;
+ break;
+ case IPT_CONNMARK_RESTORE:
+ nfmark = (*pskb)->nfmark;
+ diff = (ct->mark ^ nfmark) & markinfo->mask;
+ if (diff != 0) {
+ (*pskb)->nfmark = nfmark ^ diff;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ break;
+ }
+ }
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ipt_connmark_target_info *matchinfo = targinfo;
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
+ printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
+ return 0;
+ }
+
+ if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_connmark_reg = {
+ .name = "CONNMARK",
+ .target = &target,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_connmark_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_connmark_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
new file mode 100644
index 00000000000..3ea4509099f
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -0,0 +1,106 @@
+/* iptables module for setting the IPv4 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_DSCP.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables DSCP modification module");
+MODULE_LICENSE("GPL");
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_DSCP_info *dinfo = targinfo;
+ u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
+
+
+ if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
+ u_int16_t diffs[2];
+
+ if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+ return NF_DROP;
+
+ diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
+ (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
+ | sh_dscp;
+ diffs[1] = htons((*pskb)->nh.iph->tos);
+ (*pskb)->nh.iph->check
+ = csum_fold(csum_partial((char *)diffs,
+ sizeof(diffs),
+ (*pskb)->nh.iph->check
+ ^ 0xFFFF));
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_DSCP_info))) {
+ printk(KERN_WARNING "DSCP: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_DSCP_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "DSCP: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ if ((dscp > IPT_DSCP_MAX)) {
+ printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_dscp_reg = {
+ .name = "DSCP",
+ .target = target,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_dscp_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_dscp_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
new file mode 100644
index 00000000000..ada9911118e
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -0,0 +1,175 @@
+/* iptables module for the IPv4 and TCP ECN bits, Version 1.5
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ECN.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables ECN modification module");
+
+/* set ECT codepoint from IP header.
+ * return 0 if there was an error. */
+static inline int
+set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+{
+ if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
+ != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
+ u_int16_t diffs[2];
+
+ if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+ return 0;
+
+ diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
+ (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
+ (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+ diffs[1] = htons((*pskb)->nh.iph->tos);
+ (*pskb)->nh.iph->check
+ = csum_fold(csum_partial((char *)diffs,
+ sizeof(diffs),
+ (*pskb)->nh.iph->check
+ ^0xFFFF));
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ return 1;
+}
+
+/* Return 0 if there was an error. */
+static inline int
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+{
+ struct tcphdr _tcph, *tcph;
+ u_int16_t diffs[2];
+
+ /* Not enought header? */
+ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (!tcph)
+ return 0;
+
+ if (!(einfo->operation & IPT_ECN_OP_SET_ECE
+ || tcph->ece == einfo->proto.tcp.ece)
+ && (!(einfo->operation & IPT_ECN_OP_SET_CWR
+ || tcph->cwr == einfo->proto.tcp.cwr)))
+ return 1;
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ return 0;
+ tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+
+ diffs[0] = ((u_int16_t *)tcph)[6];
+ if (einfo->operation & IPT_ECN_OP_SET_ECE)
+ tcph->ece = einfo->proto.tcp.ece;
+ if (einfo->operation & IPT_ECN_OP_SET_CWR)
+ tcph->cwr = einfo->proto.tcp.cwr;
+ diffs[1] = ((u_int16_t *)tcph)[6];
+ diffs[0] = diffs[0] ^ 0xFFFF;
+
+ if ((*pskb)->ip_summed != CHECKSUM_HW)
+ tcph->check = csum_fold(csum_partial((char *)diffs,
+ sizeof(diffs),
+ tcph->check^0xFFFF));
+ else
+ if (skb_checksum_help(*pskb, inward))
+ return 0;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ return 1;
+}
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_ECN_info *einfo = targinfo;
+
+ if (einfo->operation & IPT_ECN_OP_SET_IP)
+ if (!set_ect_ip(pskb, einfo))
+ return NF_DROP;
+
+ if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
+ && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
+ if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+ return NF_DROP;
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
+ printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_ECN_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "ECN: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ if (einfo->operation & IPT_ECN_OP_MASK) {
+ printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
+ einfo->operation);
+ return 0;
+ }
+ if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
+ printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
+ einfo->ip_ect);
+ return 0;
+ }
+
+ if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
+ && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) {
+ printk(KERN_WARNING "ECN: cannot use TCP operations on a "
+ "non-tcp rule\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_ecn_reg = {
+ .name = "ECN",
+ .target = target,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_ecn_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_ecn_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
new file mode 100644
index 00000000000..ef08733d26d
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -0,0 +1,485 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_LOG.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables syslog logging module");
+
+static unsigned int nflog = 1;
+module_param(nflog, int, 0400);
+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Use lock to serialize, so printks don't overlap */
+static DEFINE_SPINLOCK(log_lock);
+
+/* One level of recursion won't kill us */
+static void dump_packet(const struct ipt_log_info *info,
+ const struct sk_buff *skb,
+ unsigned int iphoff)
+{
+ struct iphdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+ printk("TRUNCATED");
+ return;
+ }
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+ printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+ NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+ printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(ih->frag_off) & IP_CE)
+ printk("CE ");
+ if (ntohs(ih->frag_off) & IP_DF)
+ printk("DF ");
+ if (ntohs(ih->frag_off) & IP_MF)
+ printk("MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+ if ((info->logflags & IPT_LOG_IPOPT)
+ && ih->ihl * 4 > sizeof(struct iphdr)) {
+ unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
+ unsigned int i, optsize;
+
+ optsize = ih->ihl * 4 - sizeof(struct iphdr);
+ op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+ optsize, _opt);
+ if (op == NULL) {
+ printk("TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ printk("OPT (");
+ for (i = 0; i < optsize; i++)
+ printk("%02X", op[i]);
+ printk(") ");
+ }
+
+ switch (ih->protocol) {
+ case IPPROTO_TCP: {
+ struct tcphdr _tcph, *th;
+
+ /* Max length: 10 "PROTO=TCP " */
+ printk("PROTO=TCP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ printk("SPT=%u DPT=%u ",
+ ntohs(th->source), ntohs(th->dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (info->logflags & IPT_LOG_TCPSEQ)
+ printk("SEQ=%u ACK=%u ",
+ ntohl(th->seq), ntohl(th->ack_seq));
+ /* Max length: 13 "WINDOW=65535 " */
+ printk("WINDOW=%u ", ntohs(th->window));
+ /* Max length: 9 "RES=0x3F " */
+ printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (th->cwr)
+ printk("CWR ");
+ if (th->ece)
+ printk("ECE ");
+ if (th->urg)
+ printk("URG ");
+ if (th->ack)
+ printk("ACK ");
+ if (th->psh)
+ printk("PSH ");
+ if (th->rst)
+ printk("RST ");
+ if (th->syn)
+ printk("SYN ");
+ if (th->fin)
+ printk("FIN ");
+ /* Max length: 11 "URGP=65535 " */
+ printk("URGP=%u ", ntohs(th->urg_ptr));
+
+ if ((info->logflags & IPT_LOG_TCPOPT)
+ && th->doff * 4 > sizeof(struct tcphdr)) {
+ unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
+ unsigned char *op;
+ unsigned int i, optsize;
+
+ optsize = th->doff * 4 - sizeof(struct tcphdr);
+ op = skb_header_pointer(skb,
+ iphoff+ih->ihl*4+sizeof(_tcph),
+ optsize, _opt);
+ if (op == NULL) {
+ printk("TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ printk("OPT (");
+ for (i = 0; i < optsize; i++)
+ printk("%02X", op[i]);
+ printk(") ");
+ }
+ break;
+ }
+ case IPPROTO_UDP: {
+ struct udphdr _udph, *uh;
+
+ /* Max length: 10 "PROTO=UDP " */
+ printk("PROTO=UDP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ printk("SPT=%u DPT=%u LEN=%u ",
+ ntohs(uh->source), ntohs(uh->dest),
+ ntohs(uh->len));
+ break;
+ }
+ case IPPROTO_ICMP: {
+ struct icmphdr _icmph, *ich;
+ static size_t required_len[NR_ICMP_TYPES+1]
+ = { [ICMP_ECHOREPLY] = 4,
+ [ICMP_DEST_UNREACH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_SOURCE_QUENCH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_REDIRECT]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_ECHO] = 4,
+ [ICMP_TIME_EXCEEDED]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_PARAMETERPROB]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_TIMESTAMP] = 20,
+ [ICMP_TIMESTAMPREPLY] = 20,
+ [ICMP_ADDRESS] = 12,
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+ printk("PROTO=ICMP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_icmph), &_icmph);
+ if (ich == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (ich->type <= NR_ICMP_TYPES
+ && required_len[ich->type]
+ && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ switch (ich->type) {
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ printk("ID=%u SEQ=%u ",
+ ntohs(ich->un.echo.id),
+ ntohs(ich->un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+ printk("PARAMETER=%u ",
+ ntohl(ich->un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+ printk("GATEWAY=%u.%u.%u.%u ",
+ NIPQUAD(ich->un.gateway));
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (!iphoff) { /* Only recurse once. */
+ printk("[");
+ dump_packet(info, skb,
+ iphoff + ih->ihl*4+sizeof(_icmph));
+ printk("] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ich->type == ICMP_DEST_UNREACH
+ && ich->code == ICMP_FRAG_NEEDED)
+ printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+ }
+ break;
+ }
+ /* Max Length */
+ case IPPROTO_AH: {
+ struct ip_auth_hdr _ahdr, *ah;
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 9 "PROTO=AH " */
+ printk("PROTO=AH ");
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ printk("SPI=0x%x ", ntohl(ah->spi));
+ break;
+ }
+ case IPPROTO_ESP: {
+ struct ip_esp_hdr _esph, *eh;
+
+ /* Max length: 10 "PROTO=ESP " */
+ printk("PROTO=ESP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_esph), &_esph);
+ if (eh == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ printk("SPI=0x%x ", ntohl(eh->spi));
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+ printk("PROTO=%u ", ih->protocol);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+ printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ }
+
+ /* Proto Max log string length */
+ /* IP: 40+46+6+11+127 = 230 */
+ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
+ /* UDP: 10+max(25,20) = 35 */
+ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+ /* ESP: 10+max(25)+15 = 50 */
+ /* AH: 9+max(25)+15 = 49 */
+ /* unknown: 10 */
+
+ /* (ICMP allows recursion one level deep) */
+ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
+ /* maxlen = 230+ 91 + 230 + 252 = 803 */
+}
+
+static void
+ipt_log_packet(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct ipt_log_info *loginfo,
+ const char *level_string,
+ const char *prefix)
+{
+ spin_lock_bh(&log_lock);
+ printk(level_string);
+ printk("%sIN=%s OUT=%s ",
+ prefix == NULL ? loginfo->prefix : prefix,
+ in ? in->name : "",
+ out ? out->name : "");
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge) {
+ struct net_device *physindev = skb->nf_bridge->physindev;
+ struct net_device *physoutdev = skb->nf_bridge->physoutdev;
+
+ if (physindev && in != physindev)
+ printk("PHYSIN=%s ", physindev->name);
+ if (physoutdev && out != physoutdev)
+ printk("PHYSOUT=%s ", physoutdev->name);
+ }
+#endif
+
+ if (in && !out) {
+ /* MAC logging for input chain only. */
+ printk("MAC=");
+ if (skb->dev && skb->dev->hard_header_len
+ && skb->mac.raw != (void*)skb->nh.iph) {
+ int i;
+ unsigned char *p = skb->mac.raw;
+ for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+ printk("%02x%c", *p,
+ i==skb->dev->hard_header_len - 1
+ ? ' ':':');
+ } else
+ printk(" ");
+ }
+
+ dump_packet(loginfo, skb, 0);
+ printk("\n");
+ spin_unlock_bh(&log_lock);
+}
+
+static unsigned int
+ipt_log_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_log_info *loginfo = targinfo;
+ char level_string[4] = "< >";
+
+ level_string[1] = '0' + (loginfo->level % 8);
+ ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
+
+ return IPT_CONTINUE;
+}
+
+static void
+ipt_logfn(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *prefix)
+{
+ struct ipt_log_info loginfo = {
+ .level = 0,
+ .logflags = IPT_LOG_MASK,
+ .prefix = ""
+ };
+
+ ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
+}
+
+static int ipt_log_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_log_info *loginfo = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) {
+ DEBUGP("LOG: targinfosize %u != %u\n",
+ targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info)));
+ return 0;
+ }
+
+ if (loginfo->level >= 8) {
+ DEBUGP("LOG: level %u >= 8\n", loginfo->level);
+ return 0;
+ }
+
+ if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+ DEBUGP("LOG: prefix term %i\n",
+ loginfo->prefix[sizeof(loginfo->prefix)-1]);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_log_reg = {
+ .name = "LOG",
+ .target = ipt_log_target,
+ .checkentry = ipt_log_checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ if (ipt_register_target(&ipt_log_reg))
+ return -EINVAL;
+ if (nflog)
+ nf_log_register(PF_INET, &ipt_logfn);
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ if (nflog)
+ nf_log_unregister(PF_INET, &ipt_logfn);
+ ipt_unregister_target(&ipt_log_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
new file mode 100644
index 00000000000..33c6f9b63b8
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -0,0 +1,162 @@
+/* This is a module which is used for setting the NFMARK field of an skb. */
+
+/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_MARK.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables MARK modification module");
+
+static unsigned int
+target_v0(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_mark_target_info *markinfo = targinfo;
+
+ if((*pskb)->nfmark != markinfo->mark) {
+ (*pskb)->nfmark = markinfo->mark;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ return IPT_CONTINUE;
+}
+
+static unsigned int
+target_v1(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_mark_target_info_v1 *markinfo = targinfo;
+ int mark = 0;
+
+ switch (markinfo->mode) {
+ case IPT_MARK_SET:
+ mark = markinfo->mark;
+ break;
+
+ case IPT_MARK_AND:
+ mark = (*pskb)->nfmark & markinfo->mark;
+ break;
+
+ case IPT_MARK_OR:
+ mark = (*pskb)->nfmark | markinfo->mark;
+ break;
+ }
+
+ if((*pskb)->nfmark != mark) {
+ (*pskb)->nfmark = mark;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ return IPT_CONTINUE;
+}
+
+
+static int
+checkentry_v0(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
+ printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+checkentry_v1(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ipt_mark_target_info_v1 *markinfo = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1))){
+ printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ if (markinfo->mode != IPT_MARK_SET
+ && markinfo->mode != IPT_MARK_AND
+ && markinfo->mode != IPT_MARK_OR) {
+ printk(KERN_WARNING "MARK: unknown mode %u\n",
+ markinfo->mode);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_mark_reg_v0 = {
+ .name = "MARK",
+ .target = target_v0,
+ .checkentry = checkentry_v0,
+ .me = THIS_MODULE,
+ .revision = 0,
+};
+
+static struct ipt_target ipt_mark_reg_v1 = {
+ .name = "MARK",
+ .target = target_v1,
+ .checkentry = checkentry_v1,
+ .me = THIS_MODULE,
+ .revision = 1,
+};
+
+static int __init init(void)
+{
+ int err;
+
+ err = ipt_register_target(&ipt_mark_reg_v0);
+ if (!err) {
+ err = ipt_register_target(&ipt_mark_reg_v1);
+ if (err)
+ ipt_unregister_target(&ipt_mark_reg_v0);
+ }
+ return err;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_mark_reg_v0);
+ ipt_unregister_target(&ipt_mark_reg_v1);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
new file mode 100644
index 00000000000..57e9f6cf1c3
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -0,0 +1,207 @@
+/* Masquerade. Simple mapping which alters range to a local IP address
+ (depending on route). */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables MASQUERADE target module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Lock protects masq region inside conntrack */
+static DECLARE_RWLOCK(masq_lock);
+
+/* FIXME: Multiple targets. --RR */
+static int
+masquerade_check(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("masquerade_check: bad table `%s'.\n", tablename);
+ return 0;
+ }
+ if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+ DEBUGP("masquerade_check: size %u != %u.\n",
+ targinfosize, sizeof(*mr));
+ return 0;
+ }
+ if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
+ DEBUGP("masquerade_check: bad hooks %x.\n", hook_mask);
+ return 0;
+ }
+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ DEBUGP("masquerade_check: bad MAP_IPS.\n");
+ return 0;
+ }
+ if (mr->rangesize != 1) {
+ DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize);
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned int
+masquerade_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct ip_nat_multi_range_compat *mr;
+ struct ip_nat_range newrange;
+ struct rtable *rt;
+ u_int32_t newsrc;
+
+ IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+
+ /* FIXME: For the moment, don't do local packets, breaks
+ testsuite for 2.3.49 --RR */
+ if ((*pskb)->sk)
+ return NF_ACCEPT;
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+
+ mr = targinfo;
+ rt = (struct rtable *)(*pskb)->dst;
+ newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ if (!newsrc) {
+ printk("MASQUERADE: %s ate my IP address\n", out->name);
+ return NF_DROP;
+ }
+
+ WRITE_LOCK(&masq_lock);
+ ct->nat.masq_index = out->ifindex;
+ WRITE_UNLOCK(&masq_lock);
+
+ /* Transfer from original range. */
+ newrange = ((struct ip_nat_range)
+ { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newsrc, newsrc,
+ mr->range[0].min, mr->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static inline int
+device_cmp(struct ip_conntrack *i, void *ifindex)
+{
+ int ret;
+
+ READ_LOCK(&masq_lock);
+ ret = (i->nat.masq_index == (int)(long)ifindex);
+ READ_UNLOCK(&masq_lock);
+
+ return ret;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ conntracks which were associated with that device,
+ and forget them. */
+ IP_NF_ASSERT(dev->ifindex != 0);
+
+ ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+
+ if (event == NETDEV_DOWN) {
+ /* IP address was deleted. Search entire table for
+ conntracks which were associated with that device,
+ and forget them. */
+ IP_NF_ASSERT(dev->ifindex != 0);
+
+ ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static struct ipt_target masquerade = {
+ .name = "MASQUERADE",
+ .target = masquerade_target,
+ .checkentry = masquerade_check,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ int ret;
+
+ ret = ipt_register_target(&masquerade);
+
+ if (ret == 0) {
+ /* Register for device down reports */
+ register_netdevice_notifier(&masq_dev_notifier);
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
+ }
+
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&masquerade);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
new file mode 100644
index 00000000000..06254b29d03
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -0,0 +1,117 @@
+/* NETMAP - static NAT mapping of IP network addresses (1:1).
+ * The mapping can be applied to source (POSTROUTING),
+ * destination (PREROUTING), or both (with separate rules).
+ */
+
+/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+
+#define MODULENAME "NETMAP"
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
+MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+check(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP(MODULENAME":check: bad table `%s'.\n", tablename);
+ return 0;
+ }
+ if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+ DEBUGP(MODULENAME":check: size %u.\n", targinfosize);
+ return 0;
+ }
+ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING))) {
+ DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask);
+ return 0;
+ }
+ if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+ DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
+ return 0;
+ }
+ if (mr->rangesize != 1) {
+ DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize);
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int32_t new_ip, netmask;
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+ struct ip_nat_range newrange;
+
+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ || hooknum == NF_IP_POST_ROUTING);
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+
+ netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
+
+ if (hooknum == NF_IP_PRE_ROUTING)
+ new_ip = (*pskb)->nh.iph->daddr & ~netmask;
+ else
+ new_ip = (*pskb)->nh.iph->saddr & ~netmask;
+ new_ip |= mr->range[0].min_ip & netmask;
+
+ newrange = ((struct ip_nat_range)
+ { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ new_ip, new_ip,
+ mr->range[0].min, mr->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static struct ipt_target target_module = {
+ .name = MODULENAME,
+ .target = target,
+ .checkentry = check,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&target_module);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&target_module);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
new file mode 100644
index 00000000000..a4bb9b3bc29
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NOTRACK.c
@@ -0,0 +1,76 @@
+/* This is a module which is used for setting up fake conntracks
+ * on packets so that they are not seen by the conntrack/NAT code.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ /* Previously seen (loopback)? Ignore. */
+ if ((*pskb)->nfct != NULL)
+ return IPT_CONTINUE;
+
+ /* Attach fake conntrack entry.
+ If there is a real ct entry correspondig to this packet,
+ it'll hang aroun till timing out. We don't deal with it
+ for performance reasons. JK */
+ (*pskb)->nfct = &ip_conntrack_untracked.ct_general;
+ (*pskb)->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get((*pskb)->nfct);
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ if (targinfosize != 0) {
+ printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n",
+ targinfosize);
+ return 0;
+ }
+
+ if (strcmp(tablename, "raw") != 0) {
+ printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_notrack_reg = {
+ .name = "NOTRACK",
+ .target = target,
+ .checkentry = checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ if (ipt_register_target(&ipt_notrack_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_notrack_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
new file mode 100644
index 00000000000..d2e13447678
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -0,0 +1,129 @@
+/* Redirect. Simple mapping which alters dst to a local IP address. */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <net/protocol.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables REDIRECT target module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* FIXME: Take multiple ranges --RR */
+static int
+redirect_check(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("redirect_check: bad table `%s'.\n", table);
+ return 0;
+ }
+ if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+ DEBUGP("redirect_check: size %u.\n", targinfosize);
+ return 0;
+ }
+ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
+ DEBUGP("redirect_check: bad hooks %x.\n", hook_mask);
+ return 0;
+ }
+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ DEBUGP("redirect_check: bad MAP_IPS.\n");
+ return 0;
+ }
+ if (mr->rangesize != 1) {
+ DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize);
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned int
+redirect_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int32_t newdst;
+ const struct ip_nat_multi_range_compat *mr = targinfo;
+ struct ip_nat_range newrange;
+
+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ || hooknum == NF_IP_LOCAL_OUT);
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ /* Local packets: make them go to loopback */
+ if (hooknum == NF_IP_LOCAL_OUT)
+ newdst = htonl(0x7F000001);
+ else {
+ struct in_device *indev;
+
+ /* Device might not have an associated in_device. */
+ indev = (struct in_device *)(*pskb)->dev->ip_ptr;
+ if (indev == NULL || indev->ifa_list == NULL)
+ return NF_DROP;
+
+ /* Grab first address on interface. */
+ newdst = indev->ifa_list->ifa_local;
+ }
+
+ /* Transfer from original range. */
+ newrange = ((struct ip_nat_range)
+ { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newdst, newdst,
+ mr->range[0].min, mr->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static struct ipt_target redirect_reg = {
+ .name = "REDIRECT",
+ .target = redirect_target,
+ .checkentry = redirect_check,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&redirect_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&redirect_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
new file mode 100644
index 00000000000..266d6497928
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -0,0 +1,335 @@
+/*
+ * This is a module which is used for rejecting packets.
+ * Added support for customized reject packets (Jozsef Kadlecsik).
+ * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_REJECT.h>
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include <linux/netfilter_bridge.h>
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables REJECT target module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static inline struct rtable *route_reverse(struct sk_buff *skb,
+ struct tcphdr *tcph, int hook)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct dst_entry *odst;
+ struct flowi fl = {};
+ struct rtable *rt;
+
+ /* We don't require ip forwarding to be enabled to be able to
+ * send a RST reply for bridged traffic. */
+ if (hook != NF_IP_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+ || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+ ) {
+ fl.nl_u.ip4_u.daddr = iph->saddr;
+ if (hook == NF_IP_LOCAL_IN)
+ fl.nl_u.ip4_u.saddr = iph->daddr;
+ fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+
+ if (ip_route_output_key(&rt, &fl) != 0)
+ return NULL;
+ } else {
+ /* non-local src, find valid iif to satisfy
+ * rp-filter when calling ip_route_input. */
+ fl.nl_u.ip4_u.daddr = iph->daddr;
+ if (ip_route_output_key(&rt, &fl) != 0)
+ return NULL;
+
+ odst = skb->dst;
+ if (ip_route_input(skb, iph->saddr, iph->daddr,
+ RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+ dst_release(&rt->u.dst);
+ return NULL;
+ }
+ dst_release(&rt->u.dst);
+ rt = (struct rtable *)skb->dst;
+ skb->dst = odst;
+
+ fl.nl_u.ip4_u.daddr = iph->saddr;
+ fl.nl_u.ip4_u.saddr = iph->daddr;
+ fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+ }
+
+ if (rt->u.dst.error) {
+ dst_release(&rt->u.dst);
+ return NULL;
+ }
+
+ fl.proto = IPPROTO_TCP;
+ fl.fl_ip_sport = tcph->dest;
+ fl.fl_ip_dport = tcph->source;
+
+ if (xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0)) {
+ dst_release(&rt->u.dst);
+ rt = NULL;
+ }
+
+ return rt;
+}
+
+/* Send RST reply */
+static void send_reset(struct sk_buff *oldskb, int hook)
+{
+ struct sk_buff *nskb;
+ struct tcphdr _otcph, *oth, *tcph;
+ struct rtable *rt;
+ u_int16_t tmp_port;
+ u_int32_t tmp_addr;
+ int needs_ack;
+ int hh_len;
+
+ /* IP header checks: fragment. */
+ if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+ return;
+
+ oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+ sizeof(_otcph), &_otcph);
+ if (oth == NULL)
+ return;
+
+ /* No RST for RST. */
+ if (oth->rst)
+ return;
+
+ /* FIXME: Check checksum --RR */
+ if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
+ return;
+
+ hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+ /* We need a linear, writeable skb. We also need to expand
+ headroom in case hh_len of incoming interface < hh_len of
+ outgoing interface */
+ nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
+ GFP_ATOMIC);
+ if (!nskb) {
+ dst_release(&rt->u.dst);
+ return;
+ }
+
+ dst_release(nskb->dst);
+ nskb->dst = &rt->u.dst;
+
+ /* This packet will not be the same as the other: clear nf fields */
+ nf_reset(nskb);
+ nskb->nfcache = 0;
+ nskb->nfmark = 0;
+#ifdef CONFIG_BRIDGE_NETFILTER
+ nf_bridge_put(nskb->nf_bridge);
+ nskb->nf_bridge = NULL;
+#endif
+
+ tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
+
+ /* Swap source and dest */
+ tmp_addr = nskb->nh.iph->saddr;
+ nskb->nh.iph->saddr = nskb->nh.iph->daddr;
+ nskb->nh.iph->daddr = tmp_addr;
+ tmp_port = tcph->source;
+ tcph->source = tcph->dest;
+ tcph->dest = tmp_port;
+
+ /* Truncate to length (no data) */
+ tcph->doff = sizeof(struct tcphdr)/4;
+ skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
+ nskb->nh.iph->tot_len = htons(nskb->len);
+
+ if (tcph->ack) {
+ needs_ack = 0;
+ tcph->seq = oth->ack_seq;
+ tcph->ack_seq = 0;
+ } else {
+ needs_ack = 1;
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
+ + oldskb->len - oldskb->nh.iph->ihl*4
+ - (oth->doff<<2));
+ tcph->seq = 0;
+ }
+
+ /* Reset flags */
+ ((u_int8_t *)tcph)[13] = 0;
+ tcph->rst = 1;
+ tcph->ack = needs_ack;
+
+ tcph->window = 0;
+ tcph->urg_ptr = 0;
+
+ /* Adjust TCP checksum */
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
+ nskb->nh.iph->saddr,
+ nskb->nh.iph->daddr,
+ csum_partial((char *)tcph,
+ sizeof(struct tcphdr), 0));
+
+ /* Adjust IP TTL, DF */
+ nskb->nh.iph->ttl = MAXTTL;
+ /* Set DF, id = 0 */
+ nskb->nh.iph->frag_off = htons(IP_DF);
+ nskb->nh.iph->id = 0;
+
+ /* Adjust IP checksum */
+ nskb->nh.iph->check = 0;
+ nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
+ nskb->nh.iph->ihl);
+
+ /* "Never happens" */
+ if (nskb->len > dst_mtu(nskb->dst))
+ goto free_nskb;
+
+ nf_ct_attach(nskb, oldskb);
+
+ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
+ dst_output);
+ return;
+
+ free_nskb:
+ kfree_skb(nskb);
+}
+
+static inline void send_unreach(struct sk_buff *skb_in, int code)
+{
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+
+static unsigned int reject(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_reject_info *reject = targinfo;
+
+ /* Our naive response construction doesn't deal with IP
+ options, and probably shouldn't try. */
+ if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+ return NF_DROP;
+
+ /* WARNING: This code causes reentry within iptables.
+ This means that the iptables jump stack is now crap. We
+ must return an absolute verdict. --RR */
+ switch (reject->with) {
+ case IPT_ICMP_NET_UNREACHABLE:
+ send_unreach(*pskb, ICMP_NET_UNREACH);
+ break;
+ case IPT_ICMP_HOST_UNREACHABLE:
+ send_unreach(*pskb, ICMP_HOST_UNREACH);
+ break;
+ case IPT_ICMP_PROT_UNREACHABLE:
+ send_unreach(*pskb, ICMP_PROT_UNREACH);
+ break;
+ case IPT_ICMP_PORT_UNREACHABLE:
+ send_unreach(*pskb, ICMP_PORT_UNREACH);
+ break;
+ case IPT_ICMP_NET_PROHIBITED:
+ send_unreach(*pskb, ICMP_NET_ANO);
+ break;
+ case IPT_ICMP_HOST_PROHIBITED:
+ send_unreach(*pskb, ICMP_HOST_ANO);
+ break;
+ case IPT_ICMP_ADMIN_PROHIBITED:
+ send_unreach(*pskb, ICMP_PKT_FILTERED);
+ break;
+ case IPT_TCP_RESET:
+ send_reset(*pskb, hooknum);
+ case IPT_ICMP_ECHOREPLY:
+ /* Doesn't happen. */
+ break;
+ }
+
+ return NF_DROP;
+}
+
+static int check(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_reject_info *rejinfo = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
+ DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
+ return 0;
+ }
+
+ /* Only allow these for packet filtering. */
+ if (strcmp(tablename, "filter") != 0) {
+ DEBUGP("REJECT: bad table `%s'.\n", tablename);
+ return 0;
+ }
+ if ((hook_mask & ~((1 << NF_IP_LOCAL_IN)
+ | (1 << NF_IP_FORWARD)
+ | (1 << NF_IP_LOCAL_OUT))) != 0) {
+ DEBUGP("REJECT: bad hook mask %X\n", hook_mask);
+ return 0;
+ }
+
+ if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+ printk("REJECT: ECHOREPLY no longer supported.\n");
+ return 0;
+ } else if (rejinfo->with == IPT_TCP_RESET) {
+ /* Must specify that it's a TCP packet */
+ if (e->ip.proto != IPPROTO_TCP
+ || (e->ip.invflags & IPT_INV_PROTO)) {
+ DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_reject_reg = {
+ .name = "REJECT",
+ .target = reject,
+ .checkentry = check,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_reject_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_reject_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
new file mode 100644
index 00000000000..7a0536d864a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -0,0 +1,211 @@
+/* Same. Just like SNAT, only try to make the connections
+ * between client A and server B always have the same source ip.
+ *
+ * (C) 2000 Paul `Rusty' Russell
+ * (C) 2001 Martin Josefsson
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
+ * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
+ * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
+ * * added --nodst to not include destination-ip in new source
+ * calculations.
+ * * added some more sanity-checks.
+ * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
+ * * fixed a buggy if-statement in same_check(), should have
+ * used ntohl() but didn't.
+ * * added support for multiple ranges. IPT_SAME_MAX_RANGE is
+ * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
+ * and is currently set to 10.
+ * * added support for 1-address range, nice to have now that
+ * we have multiple ranges.
+ */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <net/protocol.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ipt_SAME.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
+MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+same_check(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ unsigned int count, countess, rangeip, index = 0;
+ struct ipt_same_info *mr = targinfo;
+
+ mr->ipnum = 0;
+
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("same_check: bad table `%s'.\n", tablename);
+ return 0;
+ }
+ if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+ DEBUGP("same_check: size %u.\n", targinfosize);
+ return 0;
+ }
+ if (hook_mask & ~(1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING)) {
+ DEBUGP("same_check: bad hooks %x.\n", hook_mask);
+ return 0;
+ }
+ if (mr->rangesize < 1) {
+ DEBUGP("same_check: need at least one dest range.\n");
+ return 0;
+ }
+ if (mr->rangesize > IPT_SAME_MAX_RANGE) {
+ DEBUGP("same_check: too many ranges specified, maximum "
+ "is %u ranges\n",
+ IPT_SAME_MAX_RANGE);
+ return 0;
+ }
+ for (count = 0; count < mr->rangesize; count++) {
+ if (ntohl(mr->range[count].min_ip) >
+ ntohl(mr->range[count].max_ip)) {
+ DEBUGP("same_check: min_ip is larger than max_ip in "
+ "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
+ NIPQUAD(mr->range[count].min_ip),
+ NIPQUAD(mr->range[count].max_ip));
+ return 0;
+ }
+ if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
+ DEBUGP("same_check: bad MAP_IPS.\n");
+ return 0;
+ }
+ rangeip = (ntohl(mr->range[count].max_ip) -
+ ntohl(mr->range[count].min_ip) + 1);
+ mr->ipnum += rangeip;
+
+ DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip);
+ }
+ DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum);
+
+ mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
+ if (!mr->iparray) {
+ DEBUGP("same_check: Couldn't allocate %u bytes "
+ "for %u ipaddresses!\n",
+ (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+ return 0;
+ }
+ DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n",
+ (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+
+ for (count = 0; count < mr->rangesize; count++) {
+ for (countess = ntohl(mr->range[count].min_ip);
+ countess <= ntohl(mr->range[count].max_ip);
+ countess++) {
+ mr->iparray[index] = countess;
+ DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' "
+ "in index %u.\n",
+ HIPQUAD(countess), index);
+ index++;
+ }
+ }
+ return 1;
+}
+
+static void
+same_destroy(void *targinfo,
+ unsigned int targinfosize)
+{
+ struct ipt_same_info *mr = targinfo;
+
+ kfree(mr->iparray);
+
+ DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n",
+ (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+}
+
+static unsigned int
+same_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int32_t tmpip, aindex, new_ip;
+ const struct ipt_same_info *same = targinfo;
+ struct ip_nat_range newrange;
+ const struct ip_conntrack_tuple *t;
+
+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+ hooknum == NF_IP_POST_ROUTING);
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+
+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
+ /* Base new source on real src ip and optionally dst ip,
+ giving some hope for consistency across reboots.
+ Here we calculate the index in same->iparray which
+ holds the ipaddress we should use */
+
+ tmpip = ntohl(t->src.ip);
+
+ if (!(same->info & IPT_SAME_NODST))
+ tmpip += ntohl(t->dst.ip);
+
+ aindex = tmpip % same->ipnum;
+
+ new_ip = htonl(same->iparray[aindex]);
+
+ DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
+ "new src=%u.%u.%u.%u\n",
+ NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip),
+ NIPQUAD(new_ip));
+
+ /* Transfer from original range. */
+ newrange = ((struct ip_nat_range)
+ { same->range[0].flags, new_ip, new_ip,
+ /* FIXME: Use ports from correct range! */
+ same->range[0].min, same->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static struct ipt_target same_reg = {
+ .name = "SAME",
+ .target = same_target,
+ .checkentry = same_check,
+ .destroy = same_destroy,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&same_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&same_reg);
+}
+
+module_init(init);
+module_exit(fini);
+
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
new file mode 100644
index 00000000000..1049050b2bf
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -0,0 +1,262 @@
+/*
+ * This is a module which is used for setting the MSS option in TCP packets.
+ *
+ * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TCPMSS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables TCP MSS modification module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static u_int16_t
+cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
+{
+ u_int32_t diffs[] = { oldvalinv, newval };
+ return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
+ oldcheck^0xFFFF));
+}
+
+static inline unsigned int
+optlen(const u_int8_t *opt, unsigned int offset)
+{
+ /* Beware zero-length options: make finite progress */
+ if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
+ else return opt[offset+1];
+}
+
+static unsigned int
+ipt_tcpmss_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ u_int16_t tcplen, newtotlen, oldval, newmss;
+ unsigned int i;
+ u_int8_t *opt;
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
+ return NF_DROP;
+
+ iph = (*pskb)->nh.iph;
+ tcplen = (*pskb)->len - iph->ihl*4;
+
+ tcph = (void *)iph + iph->ihl*4;
+
+ /* Since it passed flags test in tcp match, we know it is is
+ not a fragment, and has data >= tcp header length. SYN
+ packets should not contain data: if they did, then we risk
+ running over MTU, sending Frag Needed and breaking things
+ badly. --RR */
+ if (tcplen != tcph->doff*4) {
+ if (net_ratelimit())
+ printk(KERN_ERR
+ "ipt_tcpmss_target: bad length (%d bytes)\n",
+ (*pskb)->len);
+ return NF_DROP;
+ }
+
+ if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
+ if(!(*pskb)->dst) {
+ if (net_ratelimit())
+ printk(KERN_ERR
+ "ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
+ return NF_DROP; /* or IPT_CONTINUE ?? */
+ }
+
+ if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
+ if (net_ratelimit())
+ printk(KERN_ERR
+ "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
+ return NF_DROP; /* or IPT_CONTINUE ?? */
+ }
+
+ newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+ } else
+ newmss = tcpmssinfo->mss;
+
+ opt = (u_int8_t *)tcph;
+ for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
+ if ((opt[i] == TCPOPT_MSS) &&
+ ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
+ (opt[i+1] == TCPOLEN_MSS)) {
+ u_int16_t oldmss;
+
+ oldmss = (opt[i+2] << 8) | opt[i+3];
+
+ if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
+ (oldmss <= newmss))
+ return IPT_CONTINUE;
+
+ opt[i+2] = (newmss & 0xff00) >> 8;
+ opt[i+3] = (newmss & 0x00ff);
+
+ tcph->check = cheat_check(htons(oldmss)^0xFFFF,
+ htons(newmss),
+ tcph->check);
+
+ DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
+ "->%u.%u.%u.%u:%hu changed TCP MSS option"
+ " (from %u to %u)\n",
+ NIPQUAD((*pskb)->nh.iph->saddr),
+ ntohs(tcph->source),
+ NIPQUAD((*pskb)->nh.iph->daddr),
+ ntohs(tcph->dest),
+ oldmss, newmss);
+ goto retmodified;
+ }
+ }
+
+ /*
+ * MSS Option not found ?! add it..
+ */
+ if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
+ struct sk_buff *newskb;
+
+ newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
+ TCPOLEN_MSS, GFP_ATOMIC);
+ if (!newskb) {
+ if (net_ratelimit())
+ printk(KERN_ERR "ipt_tcpmss_target:"
+ " unable to allocate larger skb\n");
+ return NF_DROP;
+ }
+
+ kfree_skb(*pskb);
+ *pskb = newskb;
+ iph = (*pskb)->nh.iph;
+ tcph = (void *)iph + iph->ihl*4;
+ }
+
+ skb_put((*pskb), TCPOLEN_MSS);
+
+ opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
+ memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
+
+ tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
+ htons(tcplen + TCPOLEN_MSS), tcph->check);
+ tcplen += TCPOLEN_MSS;
+
+ opt[0] = TCPOPT_MSS;
+ opt[1] = TCPOLEN_MSS;
+ opt[2] = (newmss & 0xff00) >> 8;
+ opt[3] = (newmss & 0x00ff);
+
+ tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+
+ oldval = ((u_int16_t *)tcph)[6];
+ tcph->doff += TCPOLEN_MSS/4;
+ tcph->check = cheat_check(oldval ^ 0xFFFF,
+ ((u_int16_t *)tcph)[6], tcph->check);
+
+ newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
+ iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
+ newtotlen, iph->check);
+ iph->tot_len = newtotlen;
+
+ DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
+ "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
+ NIPQUAD((*pskb)->nh.iph->saddr),
+ ntohs(tcph->source),
+ NIPQUAD((*pskb)->nh.iph->daddr),
+ ntohs(tcph->dest),
+ newmss);
+
+ retmodified:
+ /* We never hw checksum SYN packets. */
+ BUG_ON((*pskb)->ip_summed == CHECKSUM_HW);
+
+ (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
+ return IPT_CONTINUE;
+}
+
+#define TH_SYN 0x02
+
+static inline int find_syn_match(const struct ipt_entry_match *m)
+{
+ const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
+
+ if (strcmp(m->u.kernel.match->name, "tcp") == 0
+ && (tcpinfo->flg_cmp & TH_SYN)
+ && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
+ return 1;
+
+ return 0;
+}
+
+/* Must specify -p tcp --syn/--tcp-flags SYN */
+static int
+ipt_tcpmss_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
+ DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
+ targinfosize, IPT_ALIGN(sizeof(struct ipt_tcpmss_info)));
+ return 0;
+ }
+
+
+ if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
+ ((hook_mask & ~((1 << NF_IP_FORWARD)
+ | (1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) != 0)) {
+ printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+ return 0;
+ }
+
+ if (e->ip.proto == IPPROTO_TCP
+ && !(e->ip.invflags & IPT_INV_PROTO)
+ && IPT_MATCH_ITERATE(e, find_syn_match))
+ return 1;
+
+ printk("TCPMSS: Only works on TCP SYN packets\n");
+ return 0;
+}
+
+static struct ipt_target ipt_tcpmss_reg = {
+ .name = "TCPMSS",
+ .target = ipt_tcpmss_target,
+ .checkentry = ipt_tcpmss_checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_tcpmss_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_tcpmss_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
new file mode 100644
index 00000000000..85c70d240f8
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -0,0 +1,105 @@
+/* This is a module which is used for setting the TOS field of a packet. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TOS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables TOS mangling module");
+
+static unsigned int
+target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_tos_target_info *tosinfo = targinfo;
+
+ if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
+ u_int16_t diffs[2];
+
+ if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+ return NF_DROP;
+
+ diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
+ (*pskb)->nh.iph->tos
+ = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
+ | tosinfo->tos;
+ diffs[1] = htons((*pskb)->nh.iph->tos);
+ (*pskb)->nh.iph->check
+ = csum_fold(csum_partial((char *)diffs,
+ sizeof(diffs),
+ (*pskb)->nh.iph->check
+ ^0xFFFF));
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
+ printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ if (tos != IPTOS_LOWDELAY
+ && tos != IPTOS_THROUGHPUT
+ && tos != IPTOS_RELIABILITY
+ && tos != IPTOS_MINCOST
+ && tos != IPTOS_NORMALSVC) {
+ printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_tos_reg = {
+ .name = "TOS",
+ .target = target,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_tos_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_tos_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
new file mode 100644
index 00000000000..6f2cefbe16c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -0,0 +1,419 @@
+/*
+ * netfilter module for userspace packet logging daemons
+ *
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * 2000/09/22 ulog-cprange feature added
+ * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
+ * <zander@fokus.gmd.de>
+ * 2001/01/30 per-rule nlgroup conflicts with global queue.
+ * nlgroup now global (sysctl)
+ * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
+ * module loadtime -HW
+ * 2002/07/07 remove broken nflog_rcv() function -HW
+ * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
+ * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
+ * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
+ * resulting in bogus 'error during NLMSG_PUT' messages.
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This module accepts two parameters:
+ *
+ * nlbufsiz:
+ * The parameter specifies how big the buffer for each netlink multicast
+ * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
+ * get accumulated in the kernel until they are sent to userspace. It is
+ * NOT possible to allocate more than 128kB, and it is strongly discouraged,
+ * because atomically allocating 128kB inside the network rx softirq is not
+ * reliable. Please also keep in mind that this buffer size is allocated for
+ * each nlgroup you are using, so the total kernel memory usage increases
+ * by that factor.
+ *
+ * flushtimeout:
+ * Specify, after how many hundredths of a second the queue should be
+ * flushed even if it is not full yet.
+ *
+ * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <net/sock.h>
+#include <linux/bitops.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("iptables userspace logging module");
+
+#define ULOG_NL_EVENT 111 /* Harald's favorite number */
+#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */
+
+#if 0
+#define DEBUGP(format, args...) printk("%s:%s:" format, \
+ __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
+
+static unsigned int nlbufsiz = 4096;
+module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
+MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
+
+static unsigned int flushtimeout = 10;
+module_param(flushtimeout, int, 0600);
+MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
+
+static unsigned int nflog = 1;
+module_param(nflog, int, 0400);
+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
+
+/* global data structures */
+
+typedef struct {
+ unsigned int qlen; /* number of nlmsgs' in the skb */
+ struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */
+ struct sk_buff *skb; /* the pre-allocated skb */
+ struct timer_list timer; /* the timer function */
+} ulog_buff_t;
+
+static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+
+static struct sock *nflognl; /* our socket */
+static DECLARE_LOCK(ulog_lock); /* spinlock */
+
+/* send one ulog_buff_t to userspace */
+static void ulog_send(unsigned int nlgroupnum)
+{
+ ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+
+ if (timer_pending(&ub->timer)) {
+ DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n");
+ del_timer(&ub->timer);
+ }
+
+ /* last nlmsg needs NLMSG_DONE */
+ if (ub->qlen > 1)
+ ub->lastnlh->nlmsg_type = NLMSG_DONE;
+
+ NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum);
+ DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n",
+ ub->qlen, nlgroupnum);
+ netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC);
+
+ ub->qlen = 0;
+ ub->skb = NULL;
+ ub->lastnlh = NULL;
+
+}
+
+
+/* timer function to flush queue in flushtimeout time */
+static void ulog_timer(unsigned long data)
+{
+ DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
+
+ /* lock to protect against somebody modifying our structure
+ * from ipt_ulog_target at the same time */
+ LOCK_BH(&ulog_lock);
+ ulog_send(data);
+ UNLOCK_BH(&ulog_lock);
+}
+
+static struct sk_buff *ulog_alloc_skb(unsigned int size)
+{
+ struct sk_buff *skb;
+
+ /* alloc skb which should be big enough for a whole
+ * multipart message. WARNING: has to be <= 131000
+ * due to slab allocator restrictions */
+
+ skb = alloc_skb(nlbufsiz, GFP_ATOMIC);
+ if (!skb) {
+ PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n",
+ nlbufsiz);
+
+ /* try to allocate only as much as we need for
+ * current packet */
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ PRINTR("ipt_ULOG: can't even allocate %ub\n", size);
+ }
+
+ return skb;
+}
+
+static void ipt_ulog_packet(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct ipt_ulog_info *loginfo,
+ const char *prefix)
+{
+ ulog_buff_t *ub;
+ ulog_packet_msg_t *pm;
+ size_t size, copy_len;
+ struct nlmsghdr *nlh;
+
+ /* ffs == find first bit set, necessary because userspace
+ * is already shifting groupnumber, but we need unshifted.
+ * ffs() returns [1..32], we need [0..31] */
+ unsigned int groupnum = ffs(loginfo->nl_group) - 1;
+
+ /* calculate the size of the skb needed */
+ if ((loginfo->copy_range == 0) ||
+ (loginfo->copy_range > skb->len)) {
+ copy_len = skb->len;
+ } else {
+ copy_len = loginfo->copy_range;
+ }
+
+ size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+
+ ub = &ulog_buffers[groupnum];
+
+ LOCK_BH(&ulog_lock);
+
+ if (!ub->skb) {
+ if (!(ub->skb = ulog_alloc_skb(size)))
+ goto alloc_failure;
+ } else if (ub->qlen >= loginfo->qthreshold ||
+ size > skb_tailroom(ub->skb)) {
+ /* either the queue len is too high or we don't have
+ * enough room in nlskb left. send it to userspace. */
+
+ ulog_send(groupnum);
+
+ if (!(ub->skb = ulog_alloc_skb(size)))
+ goto alloc_failure;
+ }
+
+ DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen,
+ loginfo->qthreshold);
+
+ /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
+ nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
+ sizeof(*pm)+copy_len);
+ ub->qlen++;
+
+ pm = NLMSG_DATA(nlh);
+
+ /* We might not have a timestamp, get one */
+ if (skb->stamp.tv_sec == 0)
+ do_gettimeofday((struct timeval *)&skb->stamp);
+
+ /* copy hook, prefix, timestamp, payload, etc. */
+ pm->data_len = copy_len;
+ pm->timestamp_sec = skb->stamp.tv_sec;
+ pm->timestamp_usec = skb->stamp.tv_usec;
+ pm->mark = skb->nfmark;
+ pm->hook = hooknum;
+ if (prefix != NULL)
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ else if (loginfo->prefix[0] != '\0')
+ strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
+ else
+ *(pm->prefix) = '\0';
+
+ if (in && in->hard_header_len > 0
+ && skb->mac.raw != (void *) skb->nh.iph
+ && in->hard_header_len <= ULOG_MAC_LEN) {
+ memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+ pm->mac_len = in->hard_header_len;
+ } else
+ pm->mac_len = 0;
+
+ if (in)
+ strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+ else
+ pm->indev_name[0] = '\0';
+
+ if (out)
+ strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+ else
+ pm->outdev_name[0] = '\0';
+
+ /* copy_len <= skb->len, so can't fail. */
+ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
+ BUG();
+
+ /* check if we are building multi-part messages */
+ if (ub->qlen > 1) {
+ ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
+ }
+
+ ub->lastnlh = nlh;
+
+ /* if timer isn't already running, start it */
+ if (!timer_pending(&ub->timer)) {
+ ub->timer.expires = jiffies + flushtimeout * HZ / 100;
+ add_timer(&ub->timer);
+ }
+
+ /* if threshold is reached, send message to userspace */
+ if (ub->qlen >= loginfo->qthreshold) {
+ if (loginfo->qthreshold > 1)
+ nlh->nlmsg_type = NLMSG_DONE;
+ ulog_send(groupnum);
+ }
+
+ UNLOCK_BH(&ulog_lock);
+
+ return;
+
+nlmsg_failure:
+ PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
+
+alloc_failure:
+ PRINTR("ipt_ULOG: Error building netlink message\n");
+
+ UNLOCK_BH(&ulog_lock);
+}
+
+static unsigned int ipt_ulog_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo, void *userinfo)
+{
+ struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+
+ ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
+
+ return IPT_CONTINUE;
+}
+
+static void ipt_logfn(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *prefix)
+{
+ struct ipt_ulog_info loginfo = {
+ .nl_group = ULOG_DEFAULT_NLGROUP,
+ .copy_range = 0,
+ .qthreshold = ULOG_DEFAULT_QTHRESHOLD,
+ .prefix = ""
+ };
+
+ ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+}
+
+static int ipt_ulog_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hookmask)
+{
+ struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ulog_info))) {
+ DEBUGP("ipt_ULOG: targinfosize %u != 0\n", targinfosize);
+ return 0;
+ }
+
+ if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
+ DEBUGP("ipt_ULOG: prefix term %i\n",
+ loginfo->prefix[sizeof(loginfo->prefix) - 1]);
+ return 0;
+ }
+
+ if (loginfo->qthreshold > ULOG_MAX_QLEN) {
+ DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n",
+ loginfo->qthreshold);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_ulog_reg = {
+ .name = "ULOG",
+ .target = ipt_ulog_target,
+ .checkentry = ipt_ulog_checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ int i;
+
+ DEBUGP("ipt_ULOG: init module\n");
+
+ if (nlbufsiz >= 128*1024) {
+ printk("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ /* initialize ulog_buffers */
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+ init_timer(&ulog_buffers[i].timer);
+ ulog_buffers[i].timer.function = ulog_timer;
+ ulog_buffers[i].timer.data = i;
+ }
+
+ nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+ if (!nflognl)
+ return -ENOMEM;
+
+ if (ipt_register_target(&ipt_ulog_reg) != 0) {
+ sock_release(nflognl->sk_socket);
+ return -EINVAL;
+ }
+ if (nflog)
+ nf_log_register(PF_INET, &ipt_logfn);
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ulog_buff_t *ub;
+ int i;
+
+ DEBUGP("ipt_ULOG: cleanup_module\n");
+
+ if (nflog)
+ nf_log_unregister(PF_INET, &ipt_logfn);
+ ipt_unregister_target(&ipt_ulog_reg);
+ sock_release(nflognl->sk_socket);
+
+ /* remove pending timers and free allocated skb's */
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+ ub = &ulog_buffers[i];
+ if (timer_pending(&ub->timer)) {
+ DEBUGP("timer was pending, deleting\n");
+ del_timer(&ub->timer);
+ }
+
+ if (ub->skb) {
+ kfree_skb(ub->skb);
+ ub->skb = NULL;
+ }
+ }
+
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
new file mode 100644
index 00000000000..f5909a4c3fc
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -0,0 +1,77 @@
+/*
+ * iptables module to match inet_addr_type() of an ip.
+ *
+ * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4/ipt_addrtype.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("iptables addrtype match");
+
+static inline int match_type(u_int32_t addr, u_int16_t mask)
+{
+ return !!(mask & (1 << inet_addr_type(addr)));
+}
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const void *matchinfo,
+ int offset, int *hotdrop)
+{
+ const struct ipt_addrtype_info *info = matchinfo;
+ const struct iphdr *iph = skb->nh.iph;
+ int ret = 1;
+
+ if (info->source)
+ ret &= match_type(iph->saddr, info->source)^info->invert_source;
+ if (info->dest)
+ ret &= match_type(iph->daddr, info->dest)^info->invert_dest;
+
+ return ret;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) {
+ printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n.",
+ matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info)));
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_match addrtype_match = {
+ .name = "addrtype",
+ .match = match,
+ .checkentry = checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&addrtype_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&addrtype_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
new file mode 100644
index 00000000000..a0fea847cb7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -0,0 +1,117 @@
+/* Kernel module to match AH parameters. */
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv4/ipt_ah.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("iptables AH SPI match module");
+
+#ifdef DEBUG_CONNTRACK
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline int
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+{
+ int r=0;
+ duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+ min,spi,max);
+ r=(spi >= min && spi <= max) ^ invert;
+ duprintf(" result %s\n",r? "PASS" : "FAILED");
+ return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ struct ip_auth_hdr _ahdr, *ah;
+ const struct ipt_ah *ahinfo = matchinfo;
+
+ /* Must not be a fragment. */
+ if (offset)
+ return 0;
+
+ ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil AH tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ return spi_match(ahinfo->spis[0], ahinfo->spis[1],
+ ntohl(ah->spi),
+ !!(ahinfo->invflags & IPT_AH_INV_SPI));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_ah *ahinfo = matchinfo;
+
+ /* Must specify proto == AH, and no unknown invflags */
+ if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
+ duprintf("ipt_ah: Protocol %u != %u\n", ip->proto,
+ IPPROTO_AH);
+ return 0;
+ }
+ if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_ah))) {
+ duprintf("ipt_ah: matchsize %u != %u\n",
+ matchinfosize, IPT_ALIGN(sizeof(struct ipt_ah)));
+ return 0;
+ }
+ if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
+ duprintf("ipt_ah: unknown flags %X\n",
+ ahinfo->invflags);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_match ah_match = {
+ .name = "ah",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&ah_match);
+}
+
+static void __exit cleanup(void)
+{
+ ipt_unregister_match(&ah_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv4/netfilter/ipt_comment.c b/net/ipv4/netfilter/ipt_comment.c
new file mode 100644
index 00000000000..6b76a1ea524
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_comment.c
@@ -0,0 +1,59 @@
+/*
+ * Implements a dummy match to allow attaching comments to rules
+ *
+ * 2003-05-13 Brad Fisher (brad@info-link.net)
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_comment.h>
+
+MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
+MODULE_DESCRIPTION("iptables comment match module");
+MODULE_LICENSE("GPL");
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ /* We always match */
+ return 1;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ /* Check the size */
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_comment_info)))
+ return 0;
+ return 1;
+}
+
+static struct ipt_match comment_match = {
+ .name = "comment",
+ .match = match,
+ .checkentry = checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&comment_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&comment_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
new file mode 100644
index 00000000000..2706f96cea5
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -0,0 +1,81 @@
+/* This kernel module matches connection mark values set by the
+ * CONNMARK target
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
+MODULE_DESCRIPTION("IP tables connmark match module");
+MODULE_LICENSE("GPL");
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_connmark.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ const struct ipt_connmark_info *info = matchinfo;
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
+ return 0;
+
+ return 1;
+}
+
+static struct ipt_match connmark_match = {
+ .name = "connmark",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&connmark_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&connmark_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
new file mode 100644
index 00000000000..c1d22801b7c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_conntrack.c
@@ -0,0 +1,136 @@
+/* Kernel module to match connection tracking information.
+ * Superset of Rusty's minimalistic state match.
+ *
+ * (C) 2001 Marc Boucher (marc@mbsi.ca).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables connection tracking match module");
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ const struct ipt_conntrack_info *sinfo = matchinfo;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int statebit;
+
+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+
+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+
+ if (ct == &ip_conntrack_untracked)
+ statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+ else if (ct)
+ statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+ else
+ statebit = IPT_CONNTRACK_STATE_INVALID;
+
+ if(sinfo->flags & IPT_CONNTRACK_STATE) {
+ if (ct) {
+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
+ statebit |= IPT_CONNTRACK_STATE_SNAT;
+
+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
+ statebit |= IPT_CONNTRACK_STATE_DNAT;
+ }
+
+ if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_PROTO) {
+ if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_STATUS) {
+ if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+ unsigned long expires;
+
+ if(!ct)
+ return 0;
+
+ expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+
+ if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int check(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
+ return 0;
+
+ return 1;
+}
+
+static struct ipt_match conntrack_match = {
+ .name = "conntrack",
+ .match = &match,
+ .checkentry = &check,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ need_ip_conntrack();
+ return ipt_register_match(&conntrack_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&conntrack_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
new file mode 100644
index 00000000000..5df52a64a5d
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_dscp.c
@@ -0,0 +1,63 @@
+/* IP tables module for matching the value of the IPv4 DSCP field
+ *
+ * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_dscp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables DSCP matching module");
+MODULE_LICENSE("GPL");
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const void *matchinfo,
+ int offset, int *hotdrop)
+{
+ const struct ipt_dscp_info *info = matchinfo;
+ const struct iphdr *iph = skb->nh.iph;
+
+ u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
+
+ return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_dscp_info)))
+ return 0;
+
+ return 1;
+}
+
+static struct ipt_match dscp_match = {
+ .name = "dscp",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&dscp_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&dscp_match);
+
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
new file mode 100644
index 00000000000..b6f7181e89c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -0,0 +1,131 @@
+/* IP tables module for matching the value of the IPv4 and TCP ECN bits
+ *
+ * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ecn.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables ECN matching module");
+MODULE_LICENSE("GPL");
+
+static inline int match_ip(const struct sk_buff *skb,
+ const struct ipt_ecn_info *einfo)
+{
+ return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
+}
+
+static inline int match_tcp(const struct sk_buff *skb,
+ const struct ipt_ecn_info *einfo,
+ int *hotdrop)
+{
+ struct tcphdr _tcph, *th;
+
+ /* In practice, TCP match does this, so can't fail. But let's
+ * be good citizens.
+ */
+ th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ *hotdrop = 0;
+ return 0;
+ }
+
+ if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
+ if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+ if (th->ece == 1)
+ return 0;
+ } else {
+ if (th->ece == 0)
+ return 0;
+ }
+ }
+
+ if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
+ if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+ if (th->cwr == 1)
+ return 0;
+ } else {
+ if (th->cwr == 0)
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const void *matchinfo,
+ int offset, int *hotdrop)
+{
+ const struct ipt_ecn_info *info = matchinfo;
+
+ if (info->operation & IPT_ECN_OP_MATCH_IP)
+ if (!match_ip(skb, info))
+ return 0;
+
+ if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+ if (skb->nh.iph->protocol != IPPROTO_TCP)
+ return 0;
+ if (!match_tcp(skb, info, hotdrop))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ipt_ecn_info *info = matchinfo;
+
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
+ return 0;
+
+ if (info->operation & IPT_ECN_OP_MATCH_MASK)
+ return 0;
+
+ if (info->invert & IPT_ECN_OP_MATCH_MASK)
+ return 0;
+
+ if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)
+ && ip->proto != IPPROTO_TCP) {
+ printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
+ " non-tcp packets\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_match ecn_match = {
+ .name = "ecn",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&ecn_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&ecn_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
new file mode 100644
index 00000000000..e1d0dd31e11
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -0,0 +1,118 @@
+/* Kernel module to match ESP parameters. */
+
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv4/ipt_esp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("iptables ESP SPI match module");
+
+#ifdef DEBUG_CONNTRACK
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline int
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+{
+ int r=0;
+ duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+ min,spi,max);
+ r=(spi >= min && spi <= max) ^ invert;
+ duprintf(" result %s\n",r? "PASS" : "FAILED");
+ return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ struct ip_esp_hdr _esp, *eh;
+ const struct ipt_esp *espinfo = matchinfo;
+
+ /* Must not be a fragment. */
+ if (offset)
+ return 0;
+
+ eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_esp), &_esp);
+ if (eh == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil ESP tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ return spi_match(espinfo->spis[0], espinfo->spis[1],
+ ntohl(eh->spi),
+ !!(espinfo->invflags & IPT_ESP_INV_SPI));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_esp *espinfo = matchinfo;
+
+ /* Must specify proto == ESP, and no unknown invflags */
+ if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
+ duprintf("ipt_esp: Protocol %u != %u\n", ip->proto,
+ IPPROTO_ESP);
+ return 0;
+ }
+ if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_esp))) {
+ duprintf("ipt_esp: matchsize %u != %u\n",
+ matchinfosize, IPT_ALIGN(sizeof(struct ipt_esp)));
+ return 0;
+ }
+ if (espinfo->invflags & ~IPT_ESP_INV_MASK) {
+ duprintf("ipt_esp: unknown flags %X\n",
+ espinfo->invflags);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_match esp_match = {
+ .name = "esp",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&esp_match);
+}
+
+static void __exit cleanup(void)
+{
+ ipt_unregister_match(&esp_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
new file mode 100644
index 00000000000..f1937190cd7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -0,0 +1,731 @@
+/* iptables match extension to limit the number of packets per second
+ * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $
+ *
+ * Development of this code was funded by Astaro AG, http://www.astaro.com/
+ *
+ * based on ipt_limit.c by:
+ * Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
+ * Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ * Rusty Russell <rusty@rustcorp.com.au>
+ *
+ * The general idea is to create a hash table for every dstip and have a
+ * seperate limit counter per tuple. This way you can do something like 'limit
+ * the number of syn packets for each of my internal addresses.
+ *
+ * Ideally this would just be implemented as a general 'hash' match, which would
+ * allow us to attach any iptables target to it's hash buckets. But this is
+ * not possible in the current iptables architecture. As always, pkttables for
+ * 2.7.x will help ;)
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_hashlimit.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+/* FIXME: this is just for IP_NF_ASSERRT */
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
+
+/* need to declare this at the top */
+static struct proc_dir_entry *hashlimit_procdir;
+static struct file_operations dl_file_ops;
+
+/* hash table crap */
+
+struct dsthash_dst {
+ u_int32_t src_ip;
+ u_int32_t dst_ip;
+ /* ports have to be consecutive !!! */
+ u_int16_t src_port;
+ u_int16_t dst_port;
+};
+
+struct dsthash_ent {
+ /* static / read-only parts in the beginning */
+ struct hlist_node node;
+ struct dsthash_dst dst;
+
+ /* modified structure members in the end */
+ unsigned long expires; /* precalculated expiry time */
+ struct {
+ unsigned long prev; /* last modification */
+ u_int32_t credit;
+ u_int32_t credit_cap, cost;
+ } rateinfo;
+};
+
+struct ipt_hashlimit_htable {
+ struct hlist_node node; /* global list of all htables */
+ atomic_t use;
+
+ struct hashlimit_cfg cfg; /* config */
+
+ /* used internally */
+ spinlock_t lock; /* lock for list_head */
+ u_int32_t rnd; /* random seed for hash */
+ struct timer_list timer; /* timer for gc */
+ atomic_t count; /* number entries in table */
+
+ /* seq_file stuff */
+ struct proc_dir_entry *pde;
+
+ struct hlist_head hash[0]; /* hashtable itself */
+};
+
+static DECLARE_LOCK(hashlimit_lock); /* protects htables list */
+static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */
+static HLIST_HEAD(hashlimit_htables);
+static kmem_cache_t *hashlimit_cachep;
+
+static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
+{
+ return (ent->dst.dst_ip == b->dst_ip
+ && ent->dst.dst_port == b->dst_port
+ && ent->dst.src_port == b->src_port
+ && ent->dst.src_ip == b->src_ip);
+}
+
+static inline u_int32_t
+hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst)
+{
+ return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port),
+ dst->src_ip, ht->rnd) % ht->cfg.size);
+}
+
+static inline struct dsthash_ent *
+__dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
+{
+ struct dsthash_ent *ent;
+ struct hlist_node *pos;
+ u_int32_t hash = hash_dst(ht, dst);
+
+ if (!hlist_empty(&ht->hash[hash]))
+ hlist_for_each_entry(ent, pos, &ht->hash[hash], node) {
+ if (dst_cmp(ent, dst)) {
+ return ent;
+ }
+ }
+
+ return NULL;
+}
+
+/* allocate dsthash_ent, initialize dst, put in htable and lock it */
+static struct dsthash_ent *
+__dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
+{
+ struct dsthash_ent *ent;
+
+ /* initialize hash with random val at the time we allocate
+ * the first hashtable entry */
+ if (!ht->rnd)
+ get_random_bytes(&ht->rnd, 4);
+
+ if (ht->cfg.max &&
+ atomic_read(&ht->count) >= ht->cfg.max) {
+ /* FIXME: do something. question is what.. */
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "ipt_hashlimit: max count of %u reached\n",
+ ht->cfg.max);
+ return NULL;
+ }
+
+ ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
+ if (!ent) {
+ if (net_ratelimit())
+ printk(KERN_ERR
+ "ipt_hashlimit: can't allocate dsthash_ent\n");
+ return NULL;
+ }
+
+ atomic_inc(&ht->count);
+
+ ent->dst.dst_ip = dst->dst_ip;
+ ent->dst.dst_port = dst->dst_port;
+ ent->dst.src_ip = dst->src_ip;
+ ent->dst.src_port = dst->src_port;
+
+ hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
+
+ return ent;
+}
+
+static inline void
+__dsthash_free(struct ipt_hashlimit_htable *ht, struct dsthash_ent *ent)
+{
+ hlist_del(&ent->node);
+ kmem_cache_free(hashlimit_cachep, ent);
+ atomic_dec(&ht->count);
+}
+static void htable_gc(unsigned long htlong);
+
+static int htable_create(struct ipt_hashlimit_info *minfo)
+