diff -urN linux-2.6.11/Documentation/networking/ip-sysctl.txt x1/Documentation/networking/ip-sysctl.txt --- linux-2.6.11/Documentation/networking/ip-sysctl.txt 2005-03-02 08:38:07.000000000 +0100 +++ x1/Documentation/networking/ip-sysctl.txt 2004-11-04 14:07:37.000000000 +0100 @@ -661,7 +661,7 @@ TRUE: disable IPv4-mapped address feature FALSE: enable IPv4-mapped address feature - Default: FALSE (as specified in RFC2553bis) + Default: FALSE (as specified in RFC3493) IPv6 Fragmentation: diff -urN linux-2.6.11/include/linux/icmpv6.h x1/include/linux/icmpv6.h --- linux-2.6.11/include/linux/icmpv6.h 2005-03-02 08:37:50.000000000 +0100 +++ x1/include/linux/icmpv6.h 2004-08-04 02:07:47.000000000 +0200 @@ -40,14 +40,18 @@ struct icmpv6_nd_ra { __u8 hop_limit; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved:6, + __u8 reserved:3, + router_pref:2, + home_agent:1, other:1, managed:1; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 managed:1, other:1, - reserved:6; + home_agent:1, + router_pref:2, + reserved:3; #else #error "Please fix " #endif @@ -70,6 +74,8 @@ #define icmp6_addrconf_managed icmp6_dataun.u_nd_ra.managed #define icmp6_addrconf_other icmp6_dataun.u_nd_ra.other #define icmp6_rt_lifetime icmp6_dataun.u_nd_ra.rt_lifetime +#define icmp6_home_agent icmp6_dataun.u_nd_ra.home_agent +#define icmp6_router_pref icmp6_dataun.u_nd_ra.router_pref }; diff -urN linux-2.6.11/include/linux/in6.h x1/include/linux/in6.h --- linux-2.6.11/include/linux/in6.h 2005-03-02 08:38:12.000000000 +0100 +++ x1/include/linux/in6.h 2005-02-03 07:02:41.000000000 +0100 @@ -40,14 +40,14 @@ #define s6_addr32 in6_u.u6_addr32 }; -/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 +/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC3493 * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined * in network byte order, not in host byte order as are the IPv4 equivalents */ #if 0 extern const struct in6_addr in6addr_any; -#define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } #endif +#define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; #define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } @@ -56,7 +56,7 @@ __u16 sin6_port; /* Transport layer port # */ __u32 sin6_flowinfo; /* IPv6 flow information */ struct in6_addr sin6_addr; /* IPv6 address */ - __u32 sin6_scope_id; /* scope id (new in RFC2553) */ + __u32 sin6_scope_id; /* scope id */ }; struct ipv6_mreq { @@ -198,4 +198,7 @@ * MCAST_MSFILTER 48 */ +/* Netfilter */ +#define IPV6_NF_ORIGINAL_DST 80 + #endif diff -urN linux-2.6.11/include/linux/ip.h x1/include/linux/ip.h --- linux-2.6.11/include/linux/ip.h 2005-03-02 08:37:52.000000000 +0100 +++ x1/include/linux/ip.h 2005-02-11 17:24:31.000000000 +0100 @@ -152,6 +152,7 @@ }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_ALLFRAG 2 /* IPv6: always fragment */ static inline struct inet_sock *inet_sk(const struct sock *sk) { diff -urN linux-2.6.11/include/linux/ipv6.h x1/include/linux/ipv6.h --- linux-2.6.11/include/linux/ipv6.h 2005-03-02 08:38:13.000000000 +0100 +++ x1/include/linux/ipv6.h 2005-02-09 16:31:39.000000000 +0100 @@ -145,6 +145,9 @@ __s32 max_desync_factor; #endif __s32 max_addresses; +#ifdef CONFIG_IPV6_MROUTE + __s32 mc_forwarding; +#endif void *sysctl; }; @@ -167,6 +170,9 @@ DEVCONF_MAX_DESYNC_FACTOR, DEVCONF_MAX_ADDRESSES, DEVCONF_FORCE_MLD_VERSION, +#ifdef CONFIG_IPV6_MROUTE + DEVCONF_MCFORWARDING, +#endif DEVCONF_MAX }; diff -urN linux-2.6.11/include/linux/ipv6_route.h x1/include/linux/ipv6_route.h --- linux-2.6.11/include/linux/ipv6_route.h 2005-03-02 08:37:50.000000000 +0100 +++ x1/include/linux/ipv6_route.h 2004-11-25 06:33:04.000000000 +0100 @@ -19,6 +19,12 @@ #define RTF_ADDRCONF 0x00040000 /* addrconf route - RA */ #define RTF_PREFIX_RT 0x00080000 /* A prefix only route - RA */ +#define RTF_PREF_HIGH 0x08000000 +#define RTF_PREF_LOW 0x18000000 +#define RTF_PREF_INVAL 0x10000000 +#define RTF_PREF_MASK 0x18000000 +#define RTF_PREF(pref) (((pref)&3)<<27) + #define RTF_NONEXTHOP 0x00200000 /* route with no nexthop */ #define RTF_EXPIRES 0x00400000 @@ -28,6 +34,11 @@ #define RTF_LOCAL 0x80000000 +#ifdef __KERNEL__ +#define IPV6_UNSHIFT_PREF(flag) (((flag)&RTF_PREF_MASK)>>27) +#define IPV6_SIGNEDPREF(pref) ((((pref)+2)&3)-2) +#endif + struct in6_rtmsg { struct in6_addr rtmsg_dst; struct in6_addr rtmsg_src; diff -urN linux-2.6.11/include/linux/mroute6.h x1/include/linux/mroute6.h --- linux-2.6.11/include/linux/mroute6.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/mroute6.h 2005-02-09 16:31:39.000000000 +0100 @@ -0,0 +1,296 @@ +#ifndef __LINUX_MROUTE6_H +#define __LINUX_MROUTE6_H + +#include + +/* + * Based on the MROUTING 3.5 defines primarily to keep + * source compatibility with BSD. + * + * See the pim6sd code for the original history. + * + * Protocol Independent Multicast (PIM) data structures included + * Carlos Picoto (cap@di.fc.ul.pt) + * + */ + +#define MRT6_BASE 200 +#define MRT6_INIT (MRT6_BASE) /* Activate the kernel mroute code */ +#define MRT6_DONE (MRT6_BASE+1) /* Shutdown the kernel mroute */ +#define MRT6_ADD_MIF (MRT6_BASE+2) /* Add a virtual interface */ +#define MRT6_DEL_MIF (MRT6_BASE+3) /* Delete a virtual interface */ +#define MRT6_ADD_MFC (MRT6_BASE+4) /* Add a multicast forwarding entry */ +#define MRT6_DEL_MFC (MRT6_BASE+5) /* Delete a multicast forwarding entry */ +#define MRT6_VERSION (MRT6_BASE+6) /* Get the kernel multicast version */ +#define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ +#define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ + +#define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ +#define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) +#define SIOCGETRPF (SIOCPROTOPRIVATE+2) + +#define MAXMIFS 32 +typedef unsigned long mifbitmap_t; /* User mode code depends on this lot */ +typedef unsigned short mifi_t; +#define ALL_MIFS ((mifi_t)(-1)) + +#ifndef IF_SETSIZE +#define IF_SETSIZE 256 +#endif + +typedef u_int32_t if_mask; +#define NIFBITS (sizeof(if_mask) * 8) /* bits per mask */ + +#ifndef howmany +#define howmany(x, y) (((x) + ((y) - 1)) / (y)) +#endif + +typedef struct if_set { + if_mask ifs_bits[howmany(IF_SETSIZE, NIFBITS)]; +} if_set; + +#define IF_SET(n, p) ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS))) +#define IF_CLR(n, p) ((p)->ifs_bits[(n)/NIFBITS] &= ~(1 << ((n) % NIFBITS))) +#define IF_ISSET(n, p) ((p)->ifs_bits[(n)/NIFBITS] & (1 << ((n) % NIFBITS))) +#define IF_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#define IF_ZERO(p) bzero(p, sizeof(*(p))) + +/* + Same idea as select + +#define VIFM_SET(n,m) ((m)|=(1<<(n))) +#define VIFM_CLR(n,m) ((m)&=~(1<<(n))) +#define VIFM_ISSET(n,m) ((m)&(1<<(n))) +#define VIFM_CLRALL(m) ((m)=0) +#define VIFM_COPY(mfrom,mto) ((mto)=(mfrom)) +#define VIFM_SAME(m1,m2) ((m1)==(m2)) +*/ + +/* + * Passed by mrouted for an MRT_ADD_MIF - again we use the + * mrouted 3.6 structures for compatibility + */ + +struct mif6ctl { + mifi_t mif6c_mifi; /* Index of MIF */ + unsigned char mif6c_flags; /* MIFF_ flags */ + unsigned char vifc_threshold; /* ttl limit */ + unsigned int vifc_rate_limit; /* Rate limiter values (NI) */ + u_short mif6c_pifi; /* the index of the physical IF */ +}; + +#define MIFF_REGISTER 0x1 /* register vif */ + +/* + * Cache manipulation structures for mrouted and PIMd + */ + +struct mf6cctl +{ + struct sockaddr_in6 mf6cc_origin; /* Origin of mcast */ + struct sockaddr_in6 mf6cc_mcastgrp; /* Group in question */ + mifi_t mf6cc_parent; /* Where it arrived */ + struct if_set mf6cc_ifset; /* Where it is going */ + unsigned int mfcc_pkt_cnt; /* pkt count for src-grp */ + unsigned int mfcc_byte_cnt; + unsigned int mfcc_wrong_if; + int mfcc_expire; +}; + +/* + * Group count retrieval for pim6sd + */ + +struct sioc_sg_req6 +{ + struct sockaddr_in6 src; + struct sockaddr_in6 grp; + unsigned long pktcnt; + unsigned long bytecnt; + unsigned long wrong_if; +}; + +/* + * To get vif packet counts + */ + +struct sioc_mif_req6 +{ + mifi_t mifi; /* Which iface */ + unsigned long icount; /* In packets */ + unsigned long ocount; /* Out packets */ + unsigned long ibytes; /* In bytes */ + unsigned long obytes; /* Out bytes */ +}; + +/* + * That's all usermode folks + */ + +#ifdef __KERNEL__ +struct inet6_dev * ipv6_find_idev(struct net_device *dev); +#include + +extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int); +extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); +extern void ip6_mr_init(void); + +struct mif_device +{ + struct net_device *dev; /* Device we are using */ + unsigned long bytes_in,bytes_out; + unsigned long pkt_in,pkt_out; /* Statistics */ + unsigned long rate_limit; /* Traffic shaping (NI) */ + unsigned char threshold; /* TTL threshold */ + unsigned short flags; /* Control flags */ + int link; /* Physical interface index */ +}; + +#define VIFF_STATIC 0x8000 + +struct mfc6_cache +{ + struct mfc6_cache *next; /* Next entry on cache line */ + struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ + struct in6_addr mf6c_origin; /* Source of packet */ + mifi_t mf6c_parent; /* Source interface */ + int mfc_flags; /* Flags on line */ + + union { + struct { + unsigned long expires; + struct sk_buff_head unresolved; /* Unresolved buffers */ + } unres; + struct { + unsigned long last_assert; + int minvif; + int maxvif; + unsigned long bytes; + unsigned long pkt; + unsigned long wrong_if; + unsigned char ttls[MAXMIFS]; /* TTL thresholds */ + } res; + } mfc_un; +}; + +#define MFC_STATIC 1 +#define MFC_NOTIFY 2 + +#define MFC6_LINES 64 + +#if (MFC6_LINES & (MFC6_LINES -1 )) == 0 +#define MF6CHASHMOD(h) ((h) & (MFC6_LINES -1)) +#else +#define MF6CHASHMOD(h) ((h) % MFC6_LINES) +#endif + +#define MFC6_HASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \ + (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \ + (a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \ + (a).s6_addr32[2] ^ (a).s6_addr32[3]) + +#endif + + + +#define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ + +/* + * Pseudo messages used by mrouted + */ + +#define IGMPMSG_NOCACHE 1 /* Kern cache fill request to mrouted */ +#define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */ +#define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */ + +#define PIM_REGISTER 1 + +#ifdef __KERNEL__ + +#define PIM_V1_VERSION __constant_htonl(0x10000000) +#define PIM_V1_REGISTER 1 + +#define PIM_VERSION 2 + +#define PIM_NULL_REGISTER __constant_htonl(0x40000000) + +/* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ + +struct pim6reghdr +{ + __u8 type; + __u8 reserved; + __u16 csum; + __u32 flags; +}; + + +struct rtmsg; +extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); +#endif + +#ifdef __KERNEL__ + +extern struct sock *mroute6_socket; + +#define IN6_ARE_ADDR_EQUAL(a,b) \ + (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) +#endif + +/* + * Structure used to communicate from kernel to multicast router. + * We'll overlay the structure onto an MLD header (not an IPv6 heder like igmpmsg{} + * used for IPv4 implementation). This is because this structure will be passed via an + * IPv6 raw socket, on wich an application will only receiver the payload i.e the data after + * the IPv6 header and all the extension headers. (See section 3 of RFC 3542) + */ + +struct mrt6msg { +#define MRT6MSG_NOCACHE 1 +#define MRT6MSG_WRONGMIF 2 +#define MRT6MSG_WHOLEPKT 3 /* used for use level encap */ + u_char im6_mbz; /* must be zero */ + u_char im6_msgtype; /* what type of message */ + u_int16_t im6_mif; /* mif rec'd on */ + u_int32_t im6_pad; /* padding for 64 bit arch */ + struct in6_addr im6_src, im6_dst; +}; + +/* + * PIM packet header + */ +#define PIM_VERSION 2 +struct pim { +#if defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN) + u_char pim_type:4, /* the PIM message type, currently they are: + * Hello, Register, Register-Stop, Join/Prune, + * Bootstrap, Assert, Graft (PIM-DM only), + * Graft-Ack (PIM-DM only), C-RP-Adv + */ + pim_ver:4; /* PIM version number; 2 for PIMv2 */ +#else + u_char pim_ver:4, /* PIM version */ + pim_type:4; /* PIM type */ +#endif + u_char pim_rsv; /* Reserved */ + u_short pim_cksum; /* IP style check sum */ +}; + +#define PIM_MINLEN 8 /* The header min. length is 8 */ +#define PIM6_REG_MINLEN (PIM_MINLEN+40) /* Register message + inner IP6 header */ + +#define IPV6_VERSION 0x60 +#define IPV6_VERSION_MASK 0xf0 + +/* XXX :there should not be there */ +#include + +struct mld_hdr { + struct icmp6hdr mld_icmp6_hdr; + struct in6_addr mld_addr; +}; + +#define mld_type mld_icmp6_hdr.icmp6_type + +#endif diff -urN linux-2.6.11/include/linux/netfilter.h x1/include/linux/netfilter.h --- linux-2.6.11/include/linux/netfilter.h 2005-03-02 08:38:09.000000000 +0100 +++ x1/include/linux/netfilter.h 2005-02-03 05:44:11.000000000 +0100 @@ -175,6 +175,10 @@ extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +extern void (*ip6_ct_attach)(struct sk_buff *, struct sk_buff *); +#endif + /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack.h x1/include/linux/netfilter_ipv6/ip6_conntrack.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack.h 2004-10-01 11:24:38.000000000 +0200 @@ -0,0 +1,264 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_H +#define _IP6_CONNTRACK_H +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +#include +#include +#include +#include +#include + +enum ip6_conntrack_info +{ + /* Part of an established connection (either direction). */ + IP6_CT_ESTABLISHED, + + /* Like NEW, but related to an existing connection, or ICMP error + (in either direction). */ + IP6_CT_RELATED, + + /* Started a new connection to track (only + IP6_CT_DIR_ORIGINAL); may be a retransmission. */ + IP6_CT_NEW, + + /* >= this indicates reply direction */ + IP6_CT_IS_REPLY, + + /* Number of distinct IP6_CT types (no NEW in reply dirn). */ + IP6_CT_NUMBER = IP6_CT_IS_REPLY * 2 - 1 +}; + +/* Bitset representing status of connection. */ +enum ip6_conntrack_status { + /* It's an expected connection: bit 0 set. This bit never changed */ + IP6S_EXPECTED_BIT = 0, + IP6S_EXPECTED = (1 << IP6S_EXPECTED_BIT), + + /* We've seen packets both ways: bit 1 set. Can be set, not unset. */ + IP6S_SEEN_REPLY_BIT = 1, + IP6S_SEEN_REPLY = (1 << IP6S_SEEN_REPLY_BIT), + + /* Conntrack should never be early-expired. */ + IP6S_ASSURED_BIT = 2, + IP6S_ASSURED = (1 << IP6S_ASSURED_BIT), + + /* Connection is confirmed: originating packet has left box */ + IP6S_CONFIRMED_BIT = 3, + IP6S_CONFIRMED = (1 << IP6S_CONFIRMED_BIT), +}; + +#include +#include + +/* per conntrack: protocol private data */ +union ip6_conntrack_proto { + /* insert conntrack proto private data here */ + struct ip6_ct_tcp tcp; + struct ip6_ct_icmpv6 icmpv6; +}; + +union ip6_conntrack_expect_proto { + /* insert expect proto private data here */ +}; + +/* Add protocol helper include file here */ +#include + +/* per expectation: application helper private data */ +union ip6_conntrack_expect_help { + /* insert conntrack helper private data (expect) here */ + struct ip6_ct_ftp_expect exp_ftp_info; +}; + +/* per conntrack: application helper private data */ +union ip6_conntrack_help { + /* insert conntrack helper private data (master) here */ + struct ip6_ct_ftp_master ct_ftp_info; +}; + +#ifdef __KERNEL__ + +#include +#include + +#ifdef CONFIG_NF_DEBUG +#define IP6_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("NF_IP6_ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0) +#else +#define IP6_NF_ASSERT(x) +#endif + +struct ip6_conntrack_expect +{ + /* Internal linked list (global expectation list) */ + struct list_head list; + + /* reference count */ + atomic_t use; + + /* expectation list for this master */ + struct list_head expected_list; + + /* The conntrack of the master connection */ + struct ip6_conntrack *expectant; + + /* The conntrack of the sibling connection, set after + * expectation arrived */ + struct ip6_conntrack *sibling; + + /* IPv6 packet is never NATed */ + /* Tuple saved for conntrack */ +/* + struct ip6_conntrack_tuple ct_tuple; +*/ + + /* Timer function; deletes the expectation. */ + struct timer_list timeout; + + /* Data filled out by the conntrack helpers follow: */ + + /* We expect this tuple, with the following mask */ + struct ip6_conntrack_tuple tuple, mask; + + /* Function to call after setup and insertion */ + int (*expectfn)(struct ip6_conntrack *new); + + /* At which sequence number did this expectation occur */ + u_int32_t seq; + + union ip6_conntrack_expect_proto proto; + + union ip6_conntrack_expect_help help; +}; + +#include +struct ip6_conntrack +{ + /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + plus 1 for any connection(s) we are `master' for */ + struct nf_conntrack ct_general; + + /* These are my tuples; original and reply */ + struct ip6_conntrack_tuple_hash tuplehash[IP6_CT_DIR_MAX]; + + /* Have we seen traffic both ways yet? (bitset) */ + unsigned long status; + + /* Timer function; drops refcnt when it goes off. */ + struct timer_list timeout; + + /* If we're expecting another related connection, this will be + in expected linked list */ + struct list_head sibling_list; + + /* Current number of expected connections */ + unsigned int expecting; + + /* If we were expected by an expectation, this will be it */ + struct ip6_conntrack_expect *master; + + /* Helper, if any. */ + struct ip6_conntrack_helper *helper; + + /* Storage reserved for other modules: */ + union ip6_conntrack_proto proto; + + union ip6_conntrack_help help; +}; + +/* get master conntrack via master expectation */ +#define master_ct6(conntr) (conntr->master ? conntr->master->expectant : NULL) + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +extern int +ip6_conntrack_alter_reply(struct ip6_conntrack *conntrack, + const struct ip6_conntrack_tuple *newreply); + +/* Is this tuple taken? (ignoring any belonging to the given + conntrack). */ +extern int +ip6_conntrack_tuple_taken(const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack *ignored_conntrack); + +/* Return conntrack_info and tuple hash for given skb. */ +static inline struct ip6_conntrack * +ip6_conntrack_get(const struct sk_buff *skb, enum ip6_conntrack_info *ctinfo) +{ + *ctinfo = skb->nfctinfo; + return (struct ip6_conntrack *)skb->nfct; +} + +/* decrement reference count on a conntrack */ +extern inline void ip6_conntrack_put(struct ip6_conntrack *ct); + +/* find unconfirmed expectation based on tuple */ +struct ip6_conntrack_expect * +ip6_conntrack_expect_find_get(const struct ip6_conntrack_tuple *tuple); + +/* decrement reference count on an expectation */ +void ip6_conntrack_expect_put(struct ip6_conntrack_expect *exp); + +/* call to create an explicit dependency on ip6_conntrack. */ +extern void need_ip6_conntrack(void); + +extern int ip6_invert_tuplepr(struct ip6_conntrack_tuple *inverse, + const struct ip6_conntrack_tuple *orig); + +/* Refresh conntrack for this many jiffies */ +extern void ip6_ct_refresh(struct ip6_conntrack *ct, + unsigned long extra_jiffies); + +/* Call me when a conntrack is destroyed. */ +extern void (*ip6_conntrack_destroyed)(struct ip6_conntrack *conntrack); + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip6_ct_gather_frags(struct sk_buff *skb); + +/* Delete all conntracks which match. */ +extern void +ip6_ct_selective_cleanup(int (*kill)(const struct ip6_conntrack *i, void *data), + void *data); + +/* It's confirmed if it is, or has been in the hash table. */ +static inline int is_confirmed(struct ip6_conntrack *ct) +{ + return test_bit(IP6S_CONFIRMED_BIT, &ct->status); +} + +extern unsigned int ip6_conntrack_htable_size; + +/* eg. PROVIDES_CONNTRACK6(ftp); */ +#define PROVIDES_CONNTRACK6(name) \ + int needs_ip6_conntrack_##name; \ + EXPORT_SYMBOL(needs_ip6_conntrack_##name) + +/*. eg. NEEDS_CONNTRACK6(ftp); */ +#define NEEDS_CONNTRACK6(name) \ + extern int needs_ip6_conntrack_##name; \ + static int *need_ip6_conntrack_##name __attribute_used__ = &needs_ip6_conntrack_##name + +#endif /* __KERNEL__ */ +#endif /* _IP6_CONNTRACK_H */ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_core.h x1/include/linux/netfilter_ipv6/ip6_conntrack_core.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_core.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_core.h 2004-10-01 11:24:38.000000000 +0200 @@ -0,0 +1,69 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_core.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_CORE_H +#define _IP6_CONNTRACK_CORE_H +#include +#include + +/* This header is used to share core functionality between the + standalone connection tracking module, and the compatibility layer's use + of connection tracking. */ +extern unsigned int ip6_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +extern int ip6_conntrack_init(void); +extern void ip6_conntrack_cleanup(void); + +struct ip6_conntrack_protocol; +extern struct ip6_conntrack_protocol *ip6_ct_find_proto(u_int8_t protocol); +/* Like above, but you already have conntrack read lock. */ +extern struct ip6_conntrack_protocol *__ip6_ct_find_proto(u_int8_t protocol); +extern struct list_head ip6_protocol_list; + +/* Returns conntrack if it dealt with ICMP, and filled in skb->nfct */ +extern struct ip6_conntrack *icmp6_error_track(struct sk_buff *skb, + unsigned int icmp6off, + enum ip6_conntrack_info *ctinfo, + unsigned int hooknum); +extern int ip6_get_tuple(const struct ipv6hdr *ipv6h, + const struct sk_buff *skb, + unsigned int protoff, + u_int8_t protonum, + struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_protocol *protocol); + +/* Find a connection corresponding to a tuple. */ +struct ip6_conntrack_tuple_hash * +ip6_conntrack_find_get(const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack *ignored_conntrack); + +extern int __ip6_conntrack_confirm(struct sk_buff *skb); + +/* Confirm a connection: returns NF_DROP if packet must be dropped. */ +static inline int ip6_conntrack_confirm(struct sk_buff *skb) +{ + if (skb->nfct + && !is_confirmed((struct ip6_conntrack *)skb->nfct)) + return __ip6_conntrack_confirm(skb); + return NF_ACCEPT; +} + +extern struct list_head *ip6_conntrack_hash; +extern struct list_head ip6_conntrack_expect_list; +DECLARE_RWLOCK_EXTERN(ip6_conntrack_lock); +#endif /* _IP6_CONNTRACK_CORE_H */ + diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_ftp.h x1/include/linux/netfilter_ipv6/ip6_conntrack_ftp.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_ftp.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_ftp.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,57 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_ftp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_FTP_H +#define _IP6_CONNTRACK_FTP_H +/* FTP tracking. */ + +#ifdef __KERNEL__ + +#include + +/* Protects ftp part of conntracks */ +DECLARE_LOCK_EXTERN(ip6_ftp_lock); + +#define FTP_PORT 21 + +#endif /* __KERNEL__ */ + +enum ip6_ct_ftp_type +{ + /* EPRT command from client */ + IP6_CT_FTP_EPRT, + /* EPSV response from server */ + IP6_CT_FTP_EPSV, +}; + +/* This structure is per expected connection */ +struct ip6_ct_ftp_expect +{ + /* We record seq number and length of ftp ip/port text here: all in + * host order. */ + + /* sequence number of IP address in packet is in ip_conntrack_expect */ + u_int32_t len; /* length of IPv6 address */ + enum ip6_ct_ftp_type ftptype; /* EPRT or EPSV ? */ + u_int16_t port; /* Port that was to be used */ +}; + +/* This structure exists only once per master */ +struct ip6_ct_ftp_master { + /* Next valid seq position for cmd matching after newline */ + u_int32_t seq_aft_nl[IP6_CT_DIR_MAX]; + /* 0 means seq_match_aft_nl not set */ + int seq_aft_nl_set[IP6_CT_DIR_MAX]; +}; + +#endif /* _IP6_CONNTRACK_FTP_H */ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_helper.h x1/include/linux/netfilter_ipv6/ip6_conntrack_helper.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_helper.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_helper.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,57 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_helper.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +/* IP6 connection tracking helpers. */ +#ifndef _IP6_CONNTRACK_HELPER_H +#define _IP6_CONNTRACK_HELPER_H +#include + +struct module; + +/* Reuse expectation when max_expected reached */ +#define IP6_CT_HELPER_F_REUSE_EXPECT 0x01 + +struct ip6_conntrack_helper +{ + struct list_head list; /* Internal use. */ + + const char *name; /* name of the module */ + unsigned char flags; /* Flags (see above) */ + struct module *me; /* pointer to self */ + unsigned int max_expected; /* Maximum number of concurrent + * expected connections */ + unsigned int timeout; /* timeout for expecteds */ + + /* Mask of things we will help (compared against server response) */ + struct ip6_conntrack_tuple tuple; + struct ip6_conntrack_tuple mask; + + /* Function to call when data passes; return verdict, or -1 to + invalidate. */ + int (*help)(const struct sk_buff *skb, + unsigned int protoff, + struct ip6_conntrack *ct, + enum ip6_conntrack_info conntrackinfo); +}; + +extern int ip6_conntrack_helper_register(struct ip6_conntrack_helper *); +extern void ip6_conntrack_helper_unregister(struct ip6_conntrack_helper *); + +extern struct ip6_conntrack_helper *ip6_ct_find_helper(const struct ip6_conntrack_tuple *tuple); + +/* Add an expected connection: can have more than one per connection */ +extern int ip6_conntrack_expect_related(struct ip6_conntrack *related_to, + struct ip6_conntrack_expect *exp); +extern void ip6_conntrack_unexpect_related(struct ip6_conntrack_expect *exp); + +#endif /*_IP6_CONNTRACK_HELPER_H*/ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_icmpv6.h x1/include/linux/netfilter_ipv6/ip6_conntrack_icmpv6.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_icmpv6.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_icmpv6.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,24 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_icmp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_ICMPV6_H +#define _IP6_CONNTRACK_ICMPV6_H +/* ICMPv6 tracking. */ +#include + +struct ip6_ct_icmpv6 +{ + /* Optimization: when number in == number out, forget immediately. */ + atomic_t count; +}; +#endif /* _IP6_CONNTRACK_ICMPv6_H */ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_protocol.h x1/include/linux/netfilter_ipv6/ip6_conntrack_protocol.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_protocol.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_protocol.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,83 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_protocol.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +/* Header for use in defining a given protocol for connection tracking. */ +#ifndef _IP6_CONNTRACK_PROTOCOL_H +#define _IP6_CONNTRACK_PROTOCOL_H +#include +#include + +struct ip6_conntrack_protocol +{ + /* Next pointer. */ + struct list_head list; + + /* Protocol number. */ + u_int8_t proto; + + /* Protocol name */ + const char *name; + + /* Try to fill in the third arg: dataoff is offset past IPv6 + hdr and IPv6 ext hdrs. Return true if possible. */ + int (*pkt_to_tuple)(const struct sk_buff *skb, + unsigned int dataoff, + struct ip6_conntrack_tuple *tuple); + + /* Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct ip6_conntrack_tuple *inverse, + const struct ip6_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. */ + unsigned int (*print_tuple)(char *buffer, + const struct ip6_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + unsigned int (*print_conntrack)(char *buffer, + const struct ip6_conntrack *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct ip6_conntrack *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip6_conntrack_info ctinfo); + + /* Called when a new connection for this protocol found; + * returns TRUE if it's OK. If so, packet() called next. */ + int (*new)(struct ip6_conntrack *conntrack, const struct sk_buff *skb, + unsigned int dataoff); + + /* Called when a conntrack entry is destroyed */ + void (*destroy)(struct ip6_conntrack *conntrack); + + /* Has to decide if a expectation matches one packet or not */ + int (*exp_matches_pkt)(struct ip6_conntrack_expect *exp, + const struct sk_buff *skb, + unsigned int dataoff); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +/* Protocol registration. */ +extern int ip6_conntrack_protocol_register(struct ip6_conntrack_protocol *proto); +extern void ip6_conntrack_protocol_unregister(struct ip6_conntrack_protocol *proto); + +/* Existing built-in protocols */ +extern struct ip6_conntrack_protocol ip6_conntrack_protocol_tcp; +extern struct ip6_conntrack_protocol ip6_conntrack_protocol_udp; +extern struct ip6_conntrack_protocol ip6_conntrack_protocol_icmpv6; +extern int ip6_conntrack_protocol_tcp_init(void); +#endif /*_IP6_CONNTRACK_PROTOCOL_H*/ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_reasm.h x1/include/linux/netfilter_ipv6/ip6_conntrack_reasm.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_reasm.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_reasm.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,28 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_REASM_H +#define _IP6_CONNTRACK_REASM_H + +#include +extern struct sk_buff * +ip6_ct_gather_frags(struct sk_buff *skb); + +extern int +ip6_ct_output_frags(struct sk_buff *skb, struct nf_info *info); + +extern int ip6_ct_kfree_frags(struct sk_buff *skb); + +extern int ip6_ct_frags_init(void); +extern void ip6_ct_frags_cleanup(void); + +#endif /* _IP6_CONNTRACK_REASM_H */ + diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_tcp.h x1/include/linux/netfilter_ipv6/ip6_conntrack_tcp.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_tcp.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_tcp.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,41 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_TCP_H +#define _IP6_CONNTRACK_TCP_H +/* TCP tracking. */ + +enum tcp_conntrack { + TCP_CONNTRACK_NONE, + TCP_CONNTRACK_ESTABLISHED, + TCP_CONNTRACK_SYN_SENT, + TCP_CONNTRACK_SYN_RECV, + TCP_CONNTRACK_FIN_WAIT, + TCP_CONNTRACK_TIME_WAIT, + TCP_CONNTRACK_CLOSE, + TCP_CONNTRACK_CLOSE_WAIT, + TCP_CONNTRACK_LAST_ACK, + TCP_CONNTRACK_LISTEN, + TCP_CONNTRACK_MAX +}; + +struct ip6_ct_tcp +{ + enum tcp_conntrack state; + + /* Poor man's window tracking: sequence number of valid ACK + handshake completion packet */ + u_int32_t handshake_ack; +}; + +#endif /* _IP6_CONNTRACK_TCP_H */ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_tuple.h x1/include/linux/netfilter_ipv6/ip6_conntrack_tuple.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6_conntrack_tuple.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6_conntrack_tuple.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,131 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ip_conntrack_tuple.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6_CONNTRACK_TUPLE_H +#define _IP6_CONNTRACK_TUPLE_H + +#ifdef __KERNEL__ +#include +#include +#endif + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +/* The protocol-specific manipulable parts of the tuple: always in + network order! */ +union ip6_conntrack_manip_proto +{ + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int16_t id; + } icmpv6; +}; + +/* The manipulable part of the tuple. */ +struct ip6_conntrack_manip +{ + struct in6_addr ip; + union ip6_conntrack_manip_proto u; +}; + +/* This contains the information to distinguish a connection. */ +struct ip6_conntrack_tuple +{ + struct ip6_conntrack_manip src; + + /* These are the parts of the tuple which are fixed. */ + struct { + struct in6_addr ip; + union { + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int8_t type, code; + } icmpv6; + } u; + + /* The protocol. */ + u_int16_t protonum; + } dst; +}; + +enum ip6_conntrack_dir +{ + IP6_CT_DIR_ORIGINAL, + IP6_CT_DIR_REPLY, + IP6_CT_DIR_MAX +}; + +#ifdef __KERNEL__ + +#define DUMP_TUPLE(tp) \ +{ \ + DEBUGP("tuple %p: %u %x:%x:%x:%x:%x:%x:%x:%x, %hu -> %x:%x:%x:%x:%x:%x:%x:%x, %hu\n", \ + (tp), (tp)->dst.protonum, \ + NIP6((tp)->src.ip), ntohs((tp)->src.u.all), \ + NIP6((tp)->dst.ip), ntohs((tp)->dst.u.all)); \ +} + +#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP6_CT_IS_REPLY ? IP6_CT_DIR_REPLY : IP6_CT_DIR_ORIGINAL) + +/* If we're the first tuple, it's the original dir. */ +#define DIRECTION(h) ((enum ip6_conntrack_dir)(&(h)->ctrack->tuplehash[1] == (h))) + +/* Connections have two entries in the hash table: one for each way */ +struct ip6_conntrack_tuple_hash +{ + struct list_head list; + + struct ip6_conntrack_tuple tuple; + + /* this == &ctrack->tuplehash[DIRECTION(this)]. */ + struct ip6_conntrack *ctrack; +}; + +#endif /* __KERNEL__ */ + +extern int ip6_ct_tuple_src_equal(const struct ip6_conntrack_tuple *t1, + const struct ip6_conntrack_tuple *t2); + +extern int ip6_ct_tuple_dst_equal(const struct ip6_conntrack_tuple *t1, + const struct ip6_conntrack_tuple *t2); + +extern int ip6_ct_tuple_equal(const struct ip6_conntrack_tuple *t1, + const struct ip6_conntrack_tuple *t2); + +extern int ip6_ct_tuple_mask_cmp(const struct ip6_conntrack_tuple *t, + const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *mask); + +#endif /* _IP6_CONNTRACK_TUPLE_H */ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6t_REJECT.h x1/include/linux/netfilter_ipv6/ip6t_REJECT.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6t_REJECT.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6t_REJECT.h 2004-09-20 03:55:57.000000000 +0200 @@ -0,0 +1,18 @@ +#ifndef _IP6T_REJECT_H +#define _IP6T_REJECT_H + +enum ip6t_reject_with { + IP6T_ICMP6_NO_ROUTE, + IP6T_ICMP6_ADM_PROHIBITED, + IP6T_ICMP6_NOT_NEIGHBOUR, + IP6T_ICMP6_ADDR_UNREACH, + IP6T_ICMP6_PORT_UNREACH, + IP6T_ICMP6_ECHOREPLY, + IP6T_TCP_RESET +}; + +struct ip6t_reject_info { + enum ip6t_reject_with with; /* reject type */ +}; + +#endif /*_IP6T_REJECT_H*/ diff -urN linux-2.6.11/include/linux/netfilter_ipv6/ip6t_state.h x1/include/linux/netfilter_ipv6/ip6t_state.h --- linux-2.6.11/include/linux/netfilter_ipv6/ip6t_state.h 1970-01-01 01:00:00.000000000 +0100 +++ x1/include/linux/netfilter_ipv6/ip6t_state.h 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,24 @@ +/* + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: include/linux/netfilter_ipv4/ipt_state.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP6T_STATE_H +#define _IP6T_STATE_H + +#define IP6T_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP6_CT_IS_REPLY+1)) +#define IP6T_STATE_INVALID (1 << 0) + +struct ip6t_state_info +{ + unsigned int statemask; +}; +#endif /*_IP6T_STATE_H*/ diff -urN linux-2.6.11/include/linux/rtnetlink.h x1/include/linux/rtnetlink.h --- linux-2.6.11/include/linux/rtnetlink.h 2005-03-02 08:38:18.000000000 +0100 +++ x1/include/linux/rtnetlink.h 2005-02-11 17:24:31.000000000 +0100 @@ -346,6 +346,7 @@ #define RTAX_FEATURE_ECN 0x00000001 #define RTAX_FEATURE_SACK 0x00000002 #define RTAX_FEATURE_TIMESTAMP 0x00000004 +#define RTAX_FEATURE_ALLFRAG 0x00000008 struct rta_session { diff -urN linux-2.6.11/include/linux/sysctl.h x1/include/linux/sysctl.h --- linux-2.6.11/include/linux/sysctl.h 2005-03-02 08:38:10.000000000 +0100 +++ x1/include/linux/sysctl.h 2005-02-28 07:45:54.000000000 +0100 @@ -455,7 +455,8 @@ NET_IPV6_ROUTE_GC_INTERVAL=6, NET_IPV6_ROUTE_GC_ELASTICITY=7, NET_IPV6_ROUTE_MTU_EXPIRES=8, - NET_IPV6_ROUTE_MIN_ADVMSS=9 + NET_IPV6_ROUTE_MIN_ADVMSS=9, + NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10 }; enum { @@ -475,7 +476,8 @@ NET_IPV6_REGEN_MAX_RETRY=14, NET_IPV6_MAX_DESYNC_FACTOR=15, NET_IPV6_MAX_ADDRESSES=16, - NET_IPV6_FORCE_MLD_VERSION=17 + NET_IPV6_FORCE_MLD_VERSION=17, + NET_IPV6_MC_FORWARDING=18 }; /* /proc/sys/net/ipv6/icmp */ @@ -488,8 +490,8 @@ NET_NEIGH_MCAST_SOLICIT=1, NET_NEIGH_UCAST_SOLICIT=2, NET_NEIGH_APP_SOLICIT=3, - NET_NEIGH_RETRANS_TIME=4, - NET_NEIGH_REACHABLE_TIME=5, + NET_NEIGH_RETRANS_TIME=4, /* deprecated */ + NET_NEIGH_REACHABLE_TIME=5, /* deprecated */ NET_NEIGH_DELAY_PROBE_TIME=6, NET_NEIGH_GC_STALE_TIME=7, NET_NEIGH_UNRES_QLEN=8, @@ -500,7 +502,10 @@ NET_NEIGH_GC_INTERVAL=13, NET_NEIGH_GC_THRESH1=14, NET_NEIGH_GC_THRESH2=15, - NET_NEIGH_GC_THRESH3=16 + NET_NEIGH_GC_THRESH3=16, + NET_NEIGH_RETRANS_TIME_MS=17, + NET_NEIGH_REACHABLE_TIME_MS=18, + __NET_NEIGH_MAX }; /* /proc/sys/net/ipx */ diff -urN linux-2.6.11/include/linux/xfrm.h x1/include/linux/xfrm.h --- linux-2.6.11/include/linux/xfrm.h 2005-03-02 08:38:37.000000000 +0100 +++ x1/include/linux/xfrm.h 2005-01-21 06:15:37.000000000 +0100 @@ -90,8 +90,12 @@ { XFRM_POLICY_IN = 0, XFRM_POLICY_OUT = 1, +#ifdef CONFIG_USE_POLICY_FWD XFRM_POLICY_FWD = 2, XFRM_POLICY_MAX = 3 +#else + XFRM_POLICY_MAX = 2 +#endif }; enum diff -urN linux-2.6.11/include/net/addrconf.h x1/include/net/addrconf.h --- linux-2.6.11/include/net/addrconf.h 2005-03-02 08:38:18.000000000 +0100 +++ x1/include/net/addrconf.h 2005-02-03 15:43:50.000000000 +0100 @@ -102,6 +102,8 @@ extern void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len); +extern int ipv6_get_hoplimit(struct net_device *dev); + /* * anycast prototypes (anycast.c) */ diff -urN linux-2.6.11/include/net/dst.h x1/include/net/dst.h --- linux-2.6.11/include/net/dst.h 2005-03-02 08:38:38.000000000 +0100 +++ x1/include/net/dst.h 2005-02-28 07:45:54.000000000 +0100 @@ -125,6 +125,18 @@ } static inline int +ip6_dst_allfrag(const struct dst_entry *dst) +{ +#ifdef CONFIG_IPV6_ALLFRAG + int ret = dst_path_metric(dst, RTAX_FEATURES) & RTAX_FEATURE_ALLFRAG; + barrier(); + return ret; +#else + return 0; +#endif +}; + +static inline int dst_metric_locked(struct dst_entry *dst, int metric) { return dst_metric(dst, RTAX_LOCK) & (1<stats.ipv6_statistics, field); \ + SNMP_INC_STATS(ipv6_statistics, field); \ +}) +#define IP6_INC_STATS_BH(idev, field) ({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_INC_STATS_BH((_idev)->stats.ipv6_statistics, field); \ + SNMP_INC_STATS_BH(ipv6_statistics, field); \ +}) +#define IP6_INC_STATS_USER(idev, field) ({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_INC_STATS_USER(_idev->stats.ipv6_statistics, field); \ + SNMP_INC_STATS_USER(ipv6_statistics, field); \ +}) +#else #define IP6_INC_STATS(field) SNMP_INC_STATS(ipv6_statistics, field) #define IP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(ipv6_statistics, field) #define IP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(ipv6_statistics, field) +#endif DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); #define ICMP6_INC_STATS(idev, field) ({ \ struct inet6_dev *_idev = (idev); \ @@ -142,9 +163,30 @@ SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset); \ }) DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); +#if 0 +#define UDP6_INC_STATS(idev, field) ({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_INC_STATS(idev->stats.udp_stats_in6, field); \ + SNMP_INC_STATS(udp_stats_in6, field); \ +}) +#define UDP6_INC_STATS_BH(idev, field) ({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_INC_STATS_BH((_idev)->stats.udp_stats_in6, field); \ + SNMP_INC_STATS_BH(udp_stats_in6, field); \ +}) +#define UDP6_INC_STATS_USER(idev, field) ({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_INC_STATS_USER(_idev->stats.udp_stats_in6, field); \ + SNMP_INC_STATS_USER(udp_stats_in6, field); \ +}) +#else #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) -#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) +#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) +#endif extern atomic_t inet6_sock_nr; int snmp6_register_dev(struct inet6_dev *idev); @@ -231,7 +273,7 @@ void (*destructor)(struct sock *)); -extern int ipv6_parse_hopopts(struct sk_buff *skb, int); +extern int ipv6_parse_hopopts(struct sk_buff **skb, unsigned int *nhoffp); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); @@ -249,12 +291,28 @@ char *, unsigned int, unsigned int); - -extern int ipv6_addr_type(const struct in6_addr *addr); +/* + * Address manipulation functions + */ +extern int __ipv6_addr_type(const struct in6_addr *addr); +static inline int ipv6_addr_type(const struct in6_addr *addr) +{ + return __ipv6_addr_type(addr) & 0xffff; +} static inline int ipv6_addr_scope(const struct in6_addr *addr) { - return ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; + return __ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; +} + +static inline int __ipv6_addr_src_scope(int type) +{ + return type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : type>>16; +} + +static inline int ipv6_addr_src_scope(const struct in6_addr *addr) +{ + return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) @@ -305,6 +363,33 @@ a1->s6_addr32[3] == a2->s6_addr32[3]); } +/* compare "prefix length" bits of an address */ +static inline int __ipv6_prefix_equal(const u32 *a1, const u32 *a2, + unsigned int prefixlen) +{ + unsigned pdw, pbi; + + /* check complete u32 in prefix */ + pdw = prefixlen >> 5; + if (pdw && memcmp(a1, a2, pdw << 2)) + return 0; + + /* check incomplete u32 in prefix */ + pbi = prefixlen & 0x1f; + if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi)))) + return 0; + + return 1; +} + +static inline int ipv6_prefix_equal(const struct in6_addr *a1, + const struct in6_addr *a2, + unsigned int prefixlen) +{ + return __ipv6_prefix_equal(a1->s6_addr32, a2->s6_addr32, + prefixlen); +} + static inline int ipv6_addr_any(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | @@ -368,6 +453,7 @@ extern int ip6_forward(struct sk_buff *skb); extern int ip6_input(struct sk_buff *skb); extern int ip6_mc_input(struct sk_buff *skb); +extern int ip6_mr_input(struct sk_buff *skb); /* * Extension header (options) processing diff -urN linux-2.6.11/include/net/neighbour.h x1/include/net/neighbour.h --- linux-2.6.11/include/net/neighbour.h 2005-03-02 08:38:32.000000000 +0100 +++ x1/include/net/neighbour.h 2005-02-10 06:31:25.000000000 +0100 @@ -54,6 +54,8 @@ #include #include +#include + #include #include @@ -158,7 +160,8 @@ struct pneigh_entry { struct pneigh_entry *next; - struct net_device *dev; + struct net_device *dev; + struct neigh_table *tbl; u8 key[0]; }; @@ -205,6 +208,27 @@ #endif }; +struct neigh_notifier_parms { + void (*link_notifier)(void *); + void *link_notifier_data; +}; + +static __inline__ char * neigh_state(int state) +{ + switch (state) { + case NUD_NONE: return "NONE"; + case NUD_INCOMPLETE: return "INCOMPLETE"; + case NUD_REACHABLE: return "REACHABLE"; + case NUD_STALE: return "STALE"; + case NUD_DELAY: return "DELAY"; + case NUD_PROBE: return "PROBE"; + case NUD_FAILED: return "FAILED"; + case NUD_NOARP: return "NOARP"; + case NUD_PERMANENT: return "PERMANENT"; + default: return "???"; + } +} + /* flags for neigh_update() */ #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 @@ -274,7 +298,8 @@ struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *proc_handler); + proc_handler *proc_handler, + void (*notifier)(void *)); extern void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) @@ -300,18 +325,35 @@ static inline void neigh_release(struct neighbour *neigh) { +#ifdef CONFIG_IPV6_NDISC_DEBUG + printk(KERN_DEBUG "%s(neigh=%p): refcnt=%d\n", + __FUNCTION__, neigh, atomic_read(&neigh->refcnt)-1); +#endif if (atomic_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { +#ifdef CONFIG_IPV6_NDISC_DEBUG + printk(KERN_DEBUG "%s(neigh=%p): refcnt=%d\n", + __FUNCTION__, neigh, neigh ? atomic_read(&neigh->refcnt)+1 : 0); +#endif if (neigh) atomic_inc(&neigh->refcnt); return neigh; } +#ifdef CONFIG_IPV6_NDISC_DEBUG +#define neigh_hold(n) ({ \ + struct neighbour *_n = (n); \ + printk(KERN_DEBUG "%s(neigh=%p): refcnt=%d\n", \ + __FUNCTION__, _n, atomic_read(&_n->refcnt)+1); \ + atomic_inc(&_n->refcnt); \ +}) +#else #define neigh_hold(n) atomic_inc(&(n)->refcnt) +#endif static inline void neigh_confirm(struct neighbour *neigh) { @@ -331,6 +373,11 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { +#ifdef CONFIG_IPV6_NDISC_DEBUG + printk(KERN_DEBUG + "%s(neigh=%p, skb=%p): %s, refcnt=%d\n", + __FUNCTION__, neigh, skb, neigh_state(neigh->nud_state), atomic_read(&neigh->refcnt)); +#endif neigh->used = jiffies; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); diff -urN linux-2.6.11/include/net/xfrm.h x1/include/net/xfrm.h --- linux-2.6.11/include/net/xfrm.h 2005-03-02 08:38:25.000000000 +0100 +++ x1/include/net/xfrm.h 2005-02-03 13:45:59.000000000 +0100 @@ -471,8 +471,12 @@ static inline int __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) { - return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && + return __ipv6_prefix_equal(fl->fl6_dst.s6_addr32, + sel->daddr.a6, + sel->prefixlen_d) && + __ipv6_prefix_equal(fl->fl6_src.s6_addr32, + sel->saddr.a6, + sel->prefixlen_s) && !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && (fl->proto == sel->proto || !sel->proto) && @@ -863,7 +867,7 @@ extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard); extern void xfrm_input_init(void); -extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); +extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi); extern void xfrm_probe_algs(void); extern int xfrm_count_auth_supported(void); diff -urN linux-2.6.11/include/video/tgafb.h x1/include/video/tgafb.h --- linux-2.6.11/include/video/tgafb.h 2005-03-02 08:38:19.000000000 +0100 +++ x1/include/video/tgafb.h 2004-12-07 11:12:39.000000000 +0100 @@ -47,7 +47,6 @@ #define TGA_VALID_REG 0x0070 #define TGA_CURSOR_XY_REG 0x0074 #define TGA_INTR_STAT_REG 0x007c -#define TGA_DATA_REG 0x0080 #define TGA_RAMDAC_SETUP_REG 0x00c0 #define TGA_BLOCK_COLOR0_REG 0x0140 #define TGA_BLOCK_COLOR1_REG 0x0144 diff -urN linux-2.6.11/net/Kconfig x1/net/Kconfig --- linux-2.6.11/net/Kconfig 2005-03-02 08:38:34.000000000 +0100 +++ x1/net/Kconfig 2005-02-28 12:50:45.000000000 +0100 @@ -81,6 +81,18 @@ Say Y unless you know what you are doing. +config USE_POLICY_FWD + bool "Use xfrm policy fwd" + default y + ---help--- + Using XFRM_POLICY_FWD which corespond to fwd in setkey + when specifing inbound forwarding policy. + This is USAGI original changes. If you want the original + kernel behavior of IPsec, say Y. If you want behavior + which is similar to KAME IPsec stack, say N. + + Unsure, Say Y. + config INET bool "TCP/IP networking" ---help--- @@ -107,12 +119,12 @@ # IPv6 as module will cause a CRASH if you try to unload it config IPV6 - tristate "The IPv6 protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - select CRYPTO if IPV6_PRIVACY - select CRYPTO_MD5 if IPV6_PRIVACY + tristate "The IPv6 protocol" + depends on INET + select CRYPTO if IPV6_PRIVACY_MD5 + select CRYPTO_MD5 if IPV6_PRIVACY_MD5 ---help--- - This is experimental support for the IP version 6 (formerly called + This is additional support for the IP version 6 (formerly called IPng "IP next generation"). You will still be able to do regular IPv4 networking as well. @@ -127,8 +139,6 @@ To compile this protocol support as a module, choose M here: the module will be called ipv6. - It is safe to say N here for now. - source "net/ipv6/Kconfig" menuconfig NETFILTER diff -urN linux-2.6.11/net/core/neighbour.c x1/net/core/neighbour.c --- linux-2.6.11/net/core/neighbour.c 2005-03-02 08:37:47.000000000 +0100 +++ x1/net/core/neighbour.c 2005-02-20 04:42:00.000000000 +0100 @@ -33,13 +33,18 @@ #include #include +#ifdef CONFIG_IPV6_NDISC_DEBUG +#define NEIGH_DEBUG 3 +#else #define NEIGH_DEBUG 1 +#endif #define NEIGH_PRINTK(x...) printk(x) #define NEIGH_NOPRINTK(x...) do { ; } while(0) #define NEIGH_PRINTK0 NEIGH_PRINTK #define NEIGH_PRINTK1 NEIGH_NOPRINTK #define NEIGH_PRINTK2 NEIGH_NOPRINTK +#define NEIGH_PRINTK3 NEIGH_NOPRINTK #if NEIGH_DEBUG >= 1 #undef NEIGH_PRINTK1 @@ -49,6 +54,10 @@ #undef NEIGH_PRINTK2 #define NEIGH_PRINTK2 NEIGH_PRINTK #endif +#if NEIGH_DEBUG >= 3 +#undef NEIGH_PRINTK3 +#define NEIGH_PRINTK3 NEIGH_PRINTK +#endif #define PNEIGH_HASHMASK 0xF @@ -118,6 +127,10 @@ int shrunk = 0; int i; + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p)\n", + __FUNCTION__, tbl); + NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); write_lock_bh(&tbl->lock); @@ -156,9 +169,21 @@ { if ((n->nud_state & NUD_IN_TIMER) && del_timer(&n->timer)) { + + NEIGH_PRINTK3(KERN_DEBUG + "%s(n=%p): %s, refcnt=%d\n", + __FUNCTION__, + n, neigh_state(n->nud_state), atomic_read(&n->refcnt) -1); + neigh_release(n); return 1; } + + NEIGH_PRINTK3(KERN_DEBUG + "%s(n=%p): %s, refcnt=%d\n", + __FUNCTION__, + n, neigh_state(n->nud_state), atomic_read(&n->refcnt)); + return 0; } @@ -203,6 +228,11 @@ { int i; + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p, dev=%p)\n", + __FUNCTION__, + tbl, dev); + write_lock_bh(&tbl->lock); for (i = 0; i <= tbl->hash_mask; i++) { @@ -255,18 +285,30 @@ unsigned long now = jiffies; int entries; + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p)\n", + __FUNCTION__, tbl); + entries = atomic_inc_return(&tbl->entries) - 1; if (entries >= tbl->gc_thresh3 || (entries >= tbl->gc_thresh2 && time_after(now, tbl->last_flush + 5 * HZ))) { if (!neigh_forced_gc(tbl) && - entries >= tbl->gc_thresh3) + entries > tbl->gc_thresh3) { + NEIGH_PRINTK3(KERN_DEBUG + "%s(): failed to shrink table\n", + __FUNCTINO__); goto out_entries; + } } n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); - if (!n) + if (!n) { + NEIGH_PRINTK3(KERN_DEBUG + "%s(): failed to allocate memory\n", + __FUNCTION__); goto out_entries; + } memset(n, 0, tbl->entry_size); @@ -364,11 +406,20 @@ NEIGH_CACHE_STAT_INC(tbl, lookups); + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p, pkey=%p, dev=%p)\n", + __FUNCTION__, + tbl, pkey, dev); + read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); + NEIGH_PRINTK3(KERN_DEBUG + "%s() => %p (state=%s, refcnt=%d)\n", + __FUNCTION__, + n, neigh_state(n->nud_state), atomic_read(&n->refcnt)); break; } } @@ -384,11 +435,20 @@ NEIGH_CACHE_STAT_INC(tbl, lookups); + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p, pkey=%p)\n", + __FUNCTION__, + tbl, pkey); + read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { if (!memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); + NEIGH_PRINTK3(KERN_DEBUG + "%s() => %p (state=%s, refcnt=%d)\n", + __FUNCTION__, + n, neigh_state(n->nud_state), atomic_read(&n->refcnt)); break; } } @@ -402,7 +462,14 @@ u32 hash_val; int key_len = tbl->key_len; int error; - struct neighbour *n1, *rc, *n = neigh_alloc(tbl); + struct neighbour *n1, *rc, *n; + + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p, pkey=%p, dev=%p)\n", + __FUNCTION__, + tbl, pkey, dev); + + n = neigh_alloc(tbl); if (!n) { rc = ERR_PTR(-ENOBUFS); @@ -453,6 +520,10 @@ n->dead = 0; neigh_hold(n); write_unlock_bh(&tbl->lock); + NEIGH_PRINTK3(KERN_DEBUG + "%s(): => %p (state=%s, refnt=%d)\n", + __FUNCTION__, + n, neigh_state(n->nud_state), atomic_read(&n->refcnt)); NEIGH_PRINTK2("neigh %p is created.\n", n); rc = n; out: @@ -496,6 +567,7 @@ memcpy(n->key, pkey, key_len); n->dev = dev; + n->tbl = tbl; if (dev) dev_hold(dev); @@ -578,6 +650,11 @@ { struct hh_cache *hh; + NEIGH_PRINTK3(KERN_DEBUG + "%s(neigh=%p): %s, refcnt=%d, dead=%d\n", + __FUNCTION__, + neigh, neigh_state(neigh->nud_state), atomic_read(&neigh->refcnt), neigh->dead); + NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { @@ -623,6 +700,10 @@ { struct hh_cache *hh; + NEIGH_PRINTK3(KERN_DEBUG + "%s(neigh=%p): %s, refcnt=%d\n", + __FUNCTION__, + neigh, neigh_state(neigh->nud_state), atomic_read(&neigh->refcnt)); NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); neigh->output = neigh->ops->output; @@ -640,6 +721,10 @@ { struct hh_cache *hh; + NEIGH_PRINTK3(KERN_DEBUG + "%s(neigh=%p): %s, refcnt=%d\n", + __FUNCTION__, + neigh, neigh_state(neigh->nud_state), atomic_read(&neigh->refcnt)); NEIGH_PRINTK2("neigh %p is connected.\n", neigh); neigh->output = neigh->ops->connected_output; @@ -654,6 +739,11 @@ struct neighbour *n, **np; unsigned long expire, now = jiffies; + NEIGH_PRINTK3(KERN_DEBUG + "%s(arg=%p)\n", + __FUNCTION__, + tbl); + NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); write_lock(&tbl->lock); @@ -678,6 +768,11 @@ write_lock(&n->lock); + NEIGH_PRINTK3(KERN_DEBUG + "%s(): - %p, state=%s, refcnt=%d\n", + __FUNCTION__, + n, neigh_state(n->nud_state), atomic_read(&n->refcnt)); + state = n->nud_state; if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { write_unlock(&n->lock); @@ -733,17 +828,28 @@ struct neighbour *neigh = (struct neighbour *)arg; unsigned state; int notify = 0; + int refcnt; write_lock(&neigh->lock); - state = neigh->nud_state; + + NEIGH_PRINTK3(KERN_DEBUG + "%s(arg=%p): %s, refcnt=%d\n", + __FUNCTION__, + neigh, neigh_state(neigh->nud_state), atomic_read(&neigh->refcnt)); + now = jiffies; next = now + HZ; + if (!(state & NUD_IN_TIMER)) { #ifndef CONFIG_SMP printk(KERN_WARNING "neigh: timer & !nud_in_timer\n"); #endif + refcnt = atomic_read(&neigh->refcnt) - 1; + NEIGH_PRINTK3(KERN_DEBUG + "%s(): => state=%s, refcnt=%d\n", + __FUNCTION__, neigh_state(state), refcnt); goto out; } @@ -788,6 +894,7 @@ neigh->nud_state = NUD_FAILED; notify = 1; NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); + NEIGH_PRINTK2("neigh %p is failed.\n", neigh); /* It is very thin place. report_unreachable is very complicated @@ -840,6 +947,11 @@ write_lock_bh(&neigh->lock); + NEIGH_PRINTK3(KERN_DEBUG + "%s(neigh=%p, skb=%p): %s\n", + __FUNCTION__, + neigh, skb, neigh_state(neigh->nud_state)); + rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; @@ -936,17 +1048,33 @@ struct net_device *dev; int update_isrouter = 0; + NEIGH_PRINTK3(KERN_DEBUG + "%s(neigh=%p, lladdr=%p, new=%u, flags=%08x): %s\n", + __FUNCTION__, + neigh, lladdr, new, flags, neigh_state(neigh->nud_state)); + + if (!neigh) { + NEIGH_PRINTK1(KERN_WARNING "neigh_update(): neigh==NULL\n"); + return -EINVAL; + } + write_lock_bh(&neigh->lock); dev = neigh->dev; old = neigh->nud_state; err = -EPERM; + if (!dev) { + NEIGH_PRINTK1(KERN_WARNING "neigh_update(): neigh->dev==NULL\n"); + return -EINVAL; + } + if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; if (!(new & NUD_VALID)) { + /* NONE,INCOMPLETE,FAILED */ neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); @@ -1021,11 +1149,11 @@ } if (lladdr != neigh->ha) { + neigh->updated = jiffies; memcpy(&neigh->ha, lladdr, dev->addr_len); neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) - neigh->confirmed = jiffies - - (neigh->parms->base_reachable_time << 1); + neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1); #ifdef CONFIG_ARPD notify = 1; #endif @@ -1040,7 +1168,6 @@ struct sk_buff *skb; /* Again: avoid dead loop if something went wrong */ - while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { struct neighbour *n1 = neigh; @@ -1071,8 +1198,15 @@ u8 *lladdr, void *saddr, struct net_device *dev) { - struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, - lladdr || !dev->addr_len); + struct neighbour *neigh; + + NEIGH_PRINTK3(KERN_DEBUG + "%s(tbl=%p, lladdr=%p, saddr=%p, dev=%p)\n", + __FUNCTION__, + tbl, lladdr, saddr, dev); + + neigh = __neigh_lookup(tbl, saddr, dev, + lladdr || !dev->addr_len); if (neigh) neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_OVERRIDE); @@ -1123,6 +1257,11 @@ { struct net_device *dev = skb->dev; + NEIGH_PRINTK3(KERN_DEBUG + "%s(skb=%p)\n", + __FUNCTION__, + skb); + __skb_pull(skb, skb->nh.raw - skb->data); if (dev->hard_header && @@ -1142,6 +1281,11 @@ struct neighbour *neigh; int rc = 0; + NEIGH_PRINTK3(KERN_DEBUG + "%s(skb=%p)\n", + __FUNCTION__, + skb); + if (!dst || !(neigh = dst->neighbour)) goto discard; @@ -1188,6 +1332,11 @@ struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; + NEIGH_PRINTK3(KERN_DEBUG + "%s(skb=%p)\n", + __FUNCTION__, + skb); + __skb_pull(skb, skb->nh.raw - skb->data); read_lock_bh(&neigh->lock); @@ -1424,6 +1573,11 @@ struct net_device *dev = NULL; int err = -ENODEV; + NEIGH_PRINTK3(KERN_DEBUG + "%s(skb=%p, nlh=%p, arg=%p)\n", + __FUNCTION__, + skb, nlh, arg); + if (ndm->ndm_ifindex && (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) goto out; @@ -1476,6 +1630,11 @@ struct net_device *dev = NULL; int err = -ENODEV; + NEIGH_PRINTK3(KERN_DEBUG + "%s(skb=%p, nlh=%p, arg=%p)\n", + __FUNCTION__, + skb, nlh, arg); + if (ndm->ndm_ifindex && (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) goto out; @@ -1544,6 +1703,40 @@ return err; } +/* + * XXX: based on neigh_fill_info() + */ +static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, + u32 pid, u32 seq, int event) +{ + int locked = 0; + unsigned char *b = skb->tail; + struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, + sizeof(struct ndmsg)); + struct ndmsg *ndm = NLMSG_DATA(nlh); + + read_lock_bh(&pn->tbl->lock); + locked = 1; + ndm->ndm_family = pn->tbl->family; + ndm->ndm_flags = NTF_PROXY; + ndm->ndm_type = 0; + ndm->ndm_ifindex = pn->dev->ifindex; + RTA_PUT(skb, NDA_DST, pn->tbl->key_len, pn->key); + read_unlock_bh(&pn->tbl->lock); + locked = 0; + ndm->ndm_state = 0; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + if (locked) + read_unlock_bh(&pn->tbl->lock); + + skb_trim(skb, b - skb->data); + return -1; +} static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, u32 pid, u32 seq, int event) @@ -1585,6 +1778,42 @@ return -1; } +/* + * XXX: based on neigh_dump_table() + */ +static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct pneigh_entry *pn; + int rc, h, s_h = cb->args[1]; + int idx, s_idx = idx = cb->args[2]; + + for (h = 0; h <= PNEIGH_HASHMASK; h++) { + if (h < s_h) + continue; + if (h > s_h) + s_idx = 0; + read_lock_bh(&tbl->lock); + for (pn = tbl->phash_buckets[h], idx = 0; pn; pn = pn->next, idx++) { + if (idx < s_idx) + continue; + if (pneigh_fill_info(skb, pn, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH) <= 0) { + read_unlock_bh(&tbl->lock); + rc = -1; + goto out; + } + } + read_unlock_bh(&tbl->lock); + } + + rc = skb->len; +out: + cb->args[1] = h; + cb->args[2] = idx; + return rc; +} static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) @@ -1623,10 +1852,15 @@ { struct neigh_table *tbl; int t, family, s_t; + long a1, a2, pa1, pa2; read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; s_t = cb->args[0]; + a1 = cb->args[1]; + a2 = cb->args[2]; + pa1 = cb->args[1]; + pa2 = cb->args[2]; for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { if (t < s_t || (family && tbl->family != family)) @@ -1634,12 +1868,28 @@ if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + cb->args[1] = a1; + cb->args[2] = a2; if (neigh_dump_table(tbl, skb, cb) < 0) break; + a1 = cb->args[1]; + a2 = cb->args[2]; + + cb->args[1] = pa1; + cb->args[2] = pa2; + if (pneigh_dump_table(tbl, skb, cb) < 0) + break; + pa1 = cb->args[1]; + pa2 = cb->args[2]; } read_unlock(&neigh_tbl_lock); cb->args[0] = t; + /* + * XXX: Fix me! currently only neigh's status is reported. + */ + cb->args[1] = a1; + cb->args[2] = a2; return skb->len; } @@ -2044,14 +2294,80 @@ #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL +static int +ndisc_proc_dointvec_ms_jiffies(struct ctl_table *ctl, + int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct neigh_notifier_parms *np = ctl->extra1; + int ret = proc_dointvec_ms_jiffies(ctl, write, filp, buffer, lenp, ppos); + + if (write && np && np->link_notifier) + (np->link_notifier)(np->link_notifier_data); + return ret; +} + +static int +ndisc_proc_rtime_dointvec_ms_jiffies(struct ctl_table *ctl, + int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct neigh_notifier_parms *np = ctl->extra1; + int ret = proc_dointvec_ms_jiffies(ctl, write, filp, buffer, lenp, ppos); + + if (write) { + if (ctl->extra2) + *((unsigned int *)ctl->extra2) = neigh_rand_reach_time(*(unsigned int *)ctl->data); + if (np && np->link_notifier) + (np->link_notifier)(np->link_notifier_data); + } + return ret; +} + +static int +ndisc_sysctl_ms_jiffies(ctl_table *ctl, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, void **context) +{ + struct neigh_notifier_parms *np = ctl->extra1; + int ret = sysctl_ms_jiffies(ctl, name, nlen, + oldval, oldlenp, newval, newlen, + context); + if (newval && newlen && ret > 0 && np && np->link_notifier) + (np->link_notifier)(np->link_notifier_data); + return ret; +} + +static int +ndisc_rtime_sysctl_ms_jiffies(ctl_table *ctl, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, void **context) +{ + struct neigh_notifier_parms *np = ctl->extra1; + int ret = sysctl_ms_jiffies(ctl, name, nlen, + oldval, oldlenp, newval, newlen, + context); + if (newval && newlen && ret > 0 && np && np->link_notifier) { + if (ctl->extra2) + *((unsigned int *)ctl->extra2) = neigh_rand_reach_time(*(unsigned int *)ctl->data); + if (np && np->link_notifier) + (np->link_notifier)(np->link_notifier_data); + } + return ret; +} static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table neigh_vars[17]; + ctl_table neigh_vars[__NET_NEIGH_MAX]; ctl_table neigh_dev[2]; ctl_table neigh_neigh_dir[2]; ctl_table neigh_proto_dir[2]; ctl_table neigh_root_dir[2]; + struct neigh_notifier_parms notifier; } neigh_sysctl_template = { .neigh_vars = { { @@ -2170,6 +2486,23 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = NET_NEIGH_RETRANS_TIME_MS, + .procname = "retrans_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ndisc_proc_dointvec_ms_jiffies, + .strategy = &ndisc_sysctl_ms_jiffies, + }, + { + .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, + .procname = "base_reachable_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ndisc_proc_rtime_dointvec_ms_jiffies, + .strategy = &ndisc_rtime_sysctl_ms_jiffies, + }, + }, .neigh_dev = { { @@ -2200,7 +2533,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *handler) + proc_handler *handler, + void (*neigh_notifier)(void *)) { struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); const char *dev_name_source = NULL; @@ -2210,6 +2544,10 @@ if (!t) return -ENOBUFS; memcpy(t, &neigh_sysctl_template, sizeof(*t)); + + t->notifier.link_notifier = neigh_notifier; + t->notifier.link_notifier_data = dev; + t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; @@ -2231,7 +2569,10 @@ if (dev) { dev_name_source = dev->name; t->neigh_dev[0].ctl_name = dev->ifindex; - memset(&t->neigh_vars[12], 0, sizeof(ctl_table)); + t->neigh_vars[12].procname = NULL; + t->neigh_vars[13].procname = NULL; + t->neigh_vars[14].procname = NULL; + t->neigh_vars[15].procname = NULL; } else { t->neigh_vars[12].data = (int *)(p + 1); t->neigh_vars[13].data = (int *)(p + 1) + 1; @@ -2239,6 +2580,13 @@ t->neigh_vars[15].data = (int *)(p + 1) + 3; } + t->neigh_vars[16].data = &p->retrans_time; + t->neigh_vars[16].extra1 = &t->notifier; + + t->neigh_vars[17].data = &p->base_reachable_time; + t->neigh_vars[17].extra1 = &t->notifier; + t->neigh_vars[17].extra2 = &p->reachable_time; + dev_name = net_sysctl_strdup(dev_name_source); if (!dev_name) { err = -ENOBUFS; @@ -2313,6 +2661,7 @@ #ifdef CONFIG_ARPD EXPORT_SYMBOL(neigh_app_ns); +EXPORT_SYMBOL(neigh_app_notify); #endif #ifdef CONFIG_SYSCTL EXPORT_SYMBOL(neigh_sysctl_register); diff -urN linux-2.6.11/net/core/netfilter.c x1/net/core/netfilter.c --- linux-2.6.11/net/core/netfilter.c 2005-03-02 08:38:08.000000000 +0100 +++ x1/net/core/netfilter.c 2005-02-03 05:44:13.000000000 +0100 @@ -806,6 +806,9 @@ tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +void (*ip6_ct_attach)(struct sk_buff *, struct sk_buff *); +#endif void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) { @@ -828,6 +831,9 @@ } EXPORT_SYMBOL(ip_ct_attach); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +EXPORT_SYMBOL(ip6_ct_attach); +#endif EXPORT_SYMBOL(nf_ct_attach); EXPORT_SYMBOL(nf_getsockopt); EXPORT_SYMBOL(nf_hook_slow); diff -urN linux-2.6.11/net/ipv4/ah4.c x1/net/ipv4/ah4.c --- linux-2.6.11/net/ipv4/ah4.c 2005-03-02 08:38:25.000000000 +0100 +++ x1/net/ipv4/ah4.c 2005-02-03 06:35:55.000000000 +0100 @@ -128,6 +128,8 @@ goto out; ah = (struct ip_auth_hdr*)skb->data; + if (x->props.replay_window && xfrm_replay_check(x, ah->seq_no)) + goto out; ahp = x->data; ah_hlen = (ah->hdrlen + 2) << 2; @@ -171,6 +173,8 @@ goto out; } } + if (x->props.replay_window) + xfrm_replay_advance(x, ah->seq_no); ((struct iphdr*)work_buf)->protocol = ah->nexthdr; skb->nh.raw = skb_pull(skb, ah_hlen); memcpy(skb->nh.raw, work_buf, iph->ihl*4); diff -urN linux-2.6.11/net/ipv4/arp.c x1/net/ipv4/arp.c --- linux-2.6.11/net/ipv4/arp.c 2005-03-02 08:38:25.000000000 +0100 +++ x1/net/ipv4/arp.c 2005-02-10 06:31:25.000000000 +0100 @@ -1243,7 +1243,7 @@ arp_proc_init(); #ifdef CONFIG_SYSCTL neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + NET_IPV4_NEIGH, "ipv4", NULL, NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff -urN linux-2.6.11/net/ipv4/devinet.c x1/net/ipv4/devinet.c --- linux-2.6.11/net/ipv4/devinet.c 2005-03-02 08:37:50.000000000 +0100 +++ x1/net/ipv4/devinet.c 2005-02-10 06:31:25.000000000 +0100 @@ -153,7 +153,7 @@ dev_hold(dev); #ifdef CONFIG_SYSCTL neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + NET_IPV4_NEIGH, "ipv4", NULL, NULL); #endif /* Account for reference dev->ip_ptr */ @@ -992,7 +992,7 @@ devinet_sysctl_unregister(&in_dev->cnf); neigh_sysctl_unregister(in_dev->arp_parms); neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + NET_IPV4_NEIGH, "ipv4", NULL, NULL); devinet_sysctl_register(in_dev, &in_dev->cnf); #endif break; diff -urN linux-2.6.11/net/ipv4/esp4.c x1/net/ipv4/esp4.c --- linux-2.6.11/net/ipv4/esp4.c 2005-03-02 08:38:10.000000000 +0100 +++ x1/net/ipv4/esp4.c 2005-02-03 06:35:55.000000000 +0100 @@ -153,6 +153,7 @@ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) goto out; + esph = (struct ip_esp_hdr*)skb->data; if (elen <= 0 || (elen & (blksize-1))) goto out; @@ -161,7 +162,8 @@ if (esp->auth.icv_full_len) { u8 sum[esp->auth.icv_full_len]; u8 sum1[alen]; - + if (x->props.replay_window && xfrm_replay_check(x, esph->seq_no)) + goto out; esp->auth.icv(esp, skb, 0, skb->len-alen, sum); if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) @@ -171,6 +173,9 @@ x->stats.integrity_failed++; goto out; } + + if (x->props.replay_window) + xfrm_replay_advance(x, esph->seq_no); } if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) @@ -178,7 +183,6 @@ skb->ip_summed = CHECKSUM_NONE; - esph = (struct ip_esp_hdr*)skb->data; iph = skb->nh.iph; /* Get ivec. This can be wrong, check against another impls. */ @@ -373,7 +377,7 @@ if (x->aalg->alg_key_len > 512) goto error; } - if (x->ealg == NULL) + if (x->ealg == NULL || (x->ealg->alg_key_len == 0 && x->props.ealgo != SADB_EALG_NULL)) goto error; esp = kmalloc(sizeof(*esp), GFP_KERNEL); @@ -412,11 +416,13 @@ goto error; } esp->conf.key = x->ealg->alg_key; - esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) + if (x->props.ealgo == SADB_EALG_NULL) { + esp->conf.key_len = 0; esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else + } else { + esp->conf.key_len = (x->ealg->alg_key_len+7)/8; esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); + } if (esp->conf.tfm == NULL) goto error; esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); diff -urN linux-2.6.11/net/ipv4/ip_forward.c x1/net/ipv4/ip_forward.c --- linux-2.6.11/net/ipv4/ip_forward.c 2005-03-02 08:37:30.000000000 +0100 +++ x1/net/ipv4/ip_forward.c 2005-01-21 06:15:37.000000000 +0100 @@ -60,8 +60,13 @@ struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); +#ifdef CONFIG_USE_POLICY_FWD if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; +#else + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; +#endif if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; diff -urN linux-2.6.11/net/ipv4/xfrm4_input.c x1/net/ipv4/xfrm4_input.c --- linux-2.6.11/net/ipv4/xfrm4_input.c 2005-03-02 08:37:48.000000000 +0100 +++ x1/net/ipv4/xfrm4_input.c 2004-11-25 06:33:09.000000000 +0100 @@ -31,30 +31,29 @@ IP_ECN_set_ce(inner_iph); } -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi) { switch (nexthdr) { case IPPROTO_IPIP: if (!pskb_may_pull(skb, sizeof(struct iphdr))) return -EINVAL; *spi = skb->nh.iph->saddr; - *seq = 0; return 0; } - return xfrm_parse_spi(skb, nexthdr, spi, seq); + return xfrm_parse_spi(skb, nexthdr, spi); } int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; - u32 spi, seq; + u32 spi; struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0) + if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi)) != 0) goto drop; do { @@ -71,9 +70,6 @@ if (unlikely(x->km.state != XFRM_STATE_VALID)) goto drop_unlock; - if (x->props.replay_window && xfrm_replay_check(x, seq)) - goto drop_unlock; - if (xfrm_state_check_expire(x)) goto drop_unlock; @@ -84,9 +80,6 @@ /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->props.replay_window) - xfrm_replay_advance(x, seq); - x->curlft.bytes += skb->len; x->curlft.packets++; @@ -116,8 +109,9 @@ break; } - if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0) + if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi)) < 0) goto drop; + } while (!err); /* Allocate new secpath or COW existing one. */ diff -urN linux-2.6.11/net/ipv6/Kconfig x1/net/ipv6/Kconfig --- linux-2.6.11/net/ipv6/Kconfig 2005-03-02 08:38:09.000000000 +0100 +++ x1/net/ipv6/Kconfig 2005-02-28 12:50:45.000000000 +0100 @@ -2,7 +2,7 @@ # IPv6 configuration # config IPV6_PRIVACY - bool "IPv6: Privacy Extensions (RFC 3041) support" + bool "IPv6: Privacy Extensions support" depends on IPV6 ---help--- Privacy Extensions for Stateless Address Autoconfiguration in IPv6 @@ -17,6 +17,22 @@ See for details. +config IPV6_PRIVACY_MD5 + bool "IPv6: Use RFC 3041 randomized interface identifiers" + depends on IPV6_PRIVACY + ---help--- + Instead of standard pseudo random generator, use + traditional algorithm described in the original RFC 3041. + +config IPV6_ROUTER_PREF + bool "IPv6: default router preference" + depends on IPV6 + +config IPV6_NEW_ROUNDROBIN + bool + depends on IPV6_ROUTER_PREF + default y + config INET6_AH tristate "IPv6: AH transformation" depends on IPV6 @@ -77,3 +93,51 @@ If unsure, say N. +config IPV6_MROUTE + bool "IPv6: multicast routing (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + ---help--- + Experimental support for IPv6 multicast forwarding. + If unsure, say N. + +config IPV6_PIMSM_V2 + bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)" + depends on IPV6_MROUTE + ---help--- + Support for IPv6 PIM multicast routing protocol PIM-SMv2. + If unsure, say N. + +config IPV6_STATISTICS + bool "IPv6: per-interface statistics for SNMP" + depends on IPV6 + +config IPV6_DELPREFIX + bool "IPv6: delete prefix route on manual address deletion" + depends on IPV6 + ---help--- + When an address is being configured manually, kernel + automatically append a prefix route for the address. + On the other hand, kernel won't delete it when the address + is being deleted. + + It is not a problem on prefix for auto-configured address, + but this probably is confusing on prefix for manual addresses. + (Note: prefix route for auto-configured address is managed + by its timer.) + + With this option, delete prefix route if there's no address + on the device. In addition, if all the other address are + auto-configured address, we change it to dynamic + prefix route. + + This is experimental. + +config IPV6_ALLFRAG + bool "IPv6: fragment < 1280" + depends on IPV6 + ---help--- + According to RFC2460, always append fragment header after + receiving TooBig w/ mtu < 1280. + + This is experimental. + diff -urN linux-2.6.11/net/ipv6/Makefile x1/net/ipv6/Makefile --- linux-2.6.11/net/ipv6/Makefile 2005-03-02 08:38:17.000000000 +0100 +++ x1/net/ipv6/Makefile 2005-02-09 16:31:39.000000000 +0100 @@ -10,6 +10,8 @@ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ ip6_flowlabel.o ipv6_syms.o +ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o + ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-objs += $(ipv6-y) diff -urN linux-2.6.11/net/ipv6/addrconf.c x1/net/ipv6/addrconf.c --- linux-2.6.11/net/ipv6/addrconf.c 2005-03-02 08:38:26.000000000 +0100 +++ x1/net/ipv6/addrconf.c 2005-03-02 17:30:59.000000000 +0100 @@ -35,6 +35,10 @@ * YOSHIFUJI Hideaki @USAGI : ARCnet support * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to * seq_file. + * YOSHIFUJI Hideaki @USAGI : improved source address + * selection; consider scope, + * status etc. + * Hoerdt Mickael : Added Multicast routing support. */ #include @@ -105,8 +109,10 @@ static void ipv6_regen_rndid(unsigned long data); static int desync_factor = MAX_DESYNC_FACTOR * HZ; +#ifdef CONFIG_IPV6_PRIVACY_MD5 static struct crypto_tfm *md5_tfm; static DEFINE_SPINLOCK(md5_tfm_lock); +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ #endif static int ipv6_count_addresses(struct inet6_dev *idev); @@ -144,6 +150,8 @@ static struct notifier_block *inet6addr_chain; +static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, int ifindex); + struct ipv6_devconf ipv6_devconf = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -164,6 +172,9 @@ .max_desync_factor = MAX_DESYNC_FACTOR, #endif .max_addresses = IPV6_MAX_ADDRESSES, +#ifdef CONFIG_IPV6_MROUTE + .mc_forwarding = 0, +#endif }; static struct ipv6_devconf ipv6_devconf_dflt = { @@ -185,54 +196,58 @@ .max_desync_factor = MAX_DESYNC_FACTOR, #endif .max_addresses = IPV6_MAX_ADDRESSES, +#ifdef CONFIG_IPV6_MROUTE + .mc_forwarding = 0, +#endif }; -/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ +/* IPv6 Wildcard Address and Loopback Address defined by RFC3493 */ #if 0 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; #endif const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; -int ipv6_addr_type(const struct in6_addr *addr) +int __ipv6_addr_type(const struct in6_addr *addr) { - int type; - u32 st; + u32 st = addr->s6_addr32[0]; - st = addr->s6_addr32[0]; + /* Consider all addresses with the first three bits different of + 000 and 111 as unicasts. + */ + if ((st & htonl(0xE0000000)) != htonl(0x00000000) && + (st & htonl(0xE0000000)) != htonl(0xE0000000)) + return (IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_GLOBAL<<16); if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { - type = IPV6_ADDR_MULTICAST; + /* multicast */ + /* addr-select 3.1 */ + int type = IPV6_ADDR_MC_SCOPE(addr)<<16; - switch((st & htonl(0x00FF0000))) { - case __constant_htonl(0x00010000): + switch(type) { + case IPV6_ADDR_SCOPE_NODELOCAL<<16: type |= IPV6_ADDR_LOOPBACK; break; - case __constant_htonl(0x00020000): + case IPV6_ADDR_SCOPE_LINKLOCAL<<16: type |= IPV6_ADDR_LINKLOCAL; break; - case __constant_htonl(0x00050000): + case IPV6_ADDR_SCOPE_SITELOCAL<<16: type |= IPV6_ADDR_SITELOCAL; break; }; + type |= IPV6_ADDR_MULTICAST; return type; } - type = IPV6_ADDR_UNICAST; - - /* Consider all addresses with the first three bits different of - 000 and 111 as finished. - */ - if ((st & htonl(0xE0000000)) != htonl(0x00000000) && - (st & htonl(0xE0000000)) != htonl(0xE0000000)) - return type; - if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) - return (IPV6_ADDR_LINKLOCAL | type); + return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_LINKLOCAL<<16); /* addr-select 3.1 */ if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) - return (IPV6_ADDR_SITELOCAL | type); + return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_SITELOCAL<<16); /* addr-select 3.1 */ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { @@ -240,24 +255,50 @@ return IPV6_ADDR_ANY; if (addr->s6_addr32[3] == htonl(0x00000001)) - return (IPV6_ADDR_LOOPBACK | type); + return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_LINKLOCAL<<16); /* addr-select 3.4 */ - return (IPV6_ADDR_COMPATv4 | type); + return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_GLOBAL<<16); /* addr-select 3.3 */ } if (addr->s6_addr32[2] == htonl(0x0000ffff)) - return IPV6_ADDR_MAPPED; + return (IPV6_ADDR_MAPPED | + IPV6_ADDR_SCOPE_GLOBAL<<16); /* addr-select 3.3 */ + } + + return (IPV6_ADDR_RESERVED | + IPV6_ADDR_SCOPE_GLOBAL<<16); /* addr-select 3.4 */ +} + +/* find 1st bit in difference between the 2 addrs */ +static inline int addr_diff(const void *__a1, const void *__a2, int addrlen) +{ + /* find 1st bit in difference between the 2 addrs. + * bit may be an invalid value, + * but if it is >= plen, the value is ignored in any case. + */ + const u32 *a1 = __a1; + const u32 *a2 = __a2; + int i; + + addrlen >>= 2; + for (i = 0; i < addrlen; i++) { + u32 xb = a1[i] ^ a2[i]; + if (xb) { + int j = 31; + xb = ntohl(xb); + while ((xb & (1 << j)) == 0) + j--; + return (i * 32 + 31 - j); + } } + return addrlen<<5; +} - st &= htonl(0xFF000000); - if (st == 0) - return IPV6_ADDR_RESERVED; - st &= htonl(0xFE000000); - if (st == htonl(0x02000000)) - return IPV6_ADDR_RESERVED; /* for NSAP */ - if (st == htonl(0x04000000)) - return IPV6_ADDR_RESERVED; /* for IPX */ - return type; +static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return addr_diff(a1->s6_addr, a2->s6_addr, sizeof(struct in6_addr)); } static void addrconf_del_timer(struct inet6_ifaddr *ifp) @@ -366,7 +407,9 @@ #ifdef CONFIG_IPV6_PRIVACY get_random_bytes(ndev->rndid, sizeof(ndev->rndid)); +#ifdef CONFIG_IPV6_PRIVACY_MD5 get_random_bytes(ndev->entropy, sizeof(ndev->entropy)); +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ init_timer(&ndev->regen_timer); ndev->regen_timer.function = ipv6_regen_rndid; ndev->regen_timer.data = (unsigned long) ndev; @@ -391,14 +434,14 @@ ndev->tstamp = jiffies; #ifdef CONFIG_SYSCTL neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change); + NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, NULL); addrconf_sysctl_register(ndev, &ndev->cnf); #endif } return ndev; } -static struct inet6_dev * ipv6_find_idev(struct net_device *dev) +struct inet6_dev * ipv6_find_idev(struct net_device *dev) { struct inet6_dev *idev; @@ -589,6 +632,10 @@ struct inet6_ifaddr *ifa, **ifap; struct inet6_dev *idev = ifp->idev; int hash; +#ifdef CONFIG_IPV6_DELPREFIX + int deleted = 0, onlink = 0; + unsigned long expires = jiffies; +#endif hash = ipv6_addr_hash(&ifp->addr); @@ -631,7 +678,32 @@ *ifap = ifa->if_next; __in6_ifa_put(ifp); ifa->if_next = NULL; +#ifndef CONFIG_IPV6_DELPREFIX break; +#else + if (!(ifp->flags & IFA_F_PERMANENT) || + onlink > 0) + break; + deleted = 1; + } else { + if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, + ifp->prefix_len)) { + if (ifa->flags & IFA_F_PERMANENT) { + onlink = 1; + if (deleted) + break; + } else if (!onlink) { + unsigned long lifetime; + onlink = -1; + + lifetime = ifp->valid_lft; + if (lifetime > 0x7fffffffUL/HZ) + lifetime = 0x7ffffffUL/HZ; + if (time_before(expires, ifp->tstamp + lifetime * HZ)) + expires = ifp->tstamp + lifetime * HZ; + } + } +#endif } } write_unlock_bh(&idev->lock); @@ -642,6 +714,52 @@ addrconf_del_timer(ifp); +#ifdef CONFIG_IPV6_DELPREFIX + /* + * Clean-up on-link route. + * We need to respect prefix lifetime. + * + * 1) if the address was not permanent, don't delete it. + * - timer for fib entry will delete itself. + * 2) if there're other permanent addresses with same prefix, don't + * delete it. + * 3) if there're only dynamic addresses, change prefix route to + * dynamic. + * - lifetime will be set to the longest valid lifetime among the + * addresses with same prefix on the device + * - subsequent RA will update lifetime. + * 4) otherwise, delete it. + * + * --yoshfuji + */ + if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { + struct in6_addr prefix; + struct rt6_info *rt; + + ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); + rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); + + if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { + if (onlink == 0) { + ip6_del_rt(rt, NULL, NULL); + rt = NULL; + } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { + /* + * prefix lifetime and valid lifetime are + * (almost) the same. + * subsequent RA will update appropriately. + * --yoshfuji + */ + if (expires < jiffies) + expires = jiffies; + rt->rt6i_expires = expires; + rt->rt6i_flags |= RTF_EXPIRES; + } + } + dst_release(&rt->u.dst); + } +#endif + in6_ifa_put(ifp); } @@ -743,147 +861,215 @@ /* * Choose an appropriate source address + * draft-ietf-ipv6-default-addr-select-09.txt * should do: * i) get an address with an appropriate scope * ii) see if there is a specific route for the destination and use * an address of the attached interface * iii) don't use deprecated addresses */ -static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref) +#define IPV6_SADDRSELECT_SELF 0x01 +#define IPV6_SADDRSELECT_PREFERRED 0x02 +#define IPV6_SADDRSELECT_HOME 0x04 +#define IPV6_SADDRSELECT_PUBLIC 0x08 +#define IPV6_SADDRSELECT_INTERFACE 0x10 +#define IPV6_SADDRSELECT_LABEL 0x20 + +struct addrselect_attrs { + struct inet6_ifaddr *ifp; + u16 flags; + s16 matchlen; + u8 scope; +}; + +static int __inline__ ipv6_addrselect_preferred(int type) { - int pref; - pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2; -#ifdef CONFIG_IPV6_PRIVACY - pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1; -#endif - return pref; + /* section 3.3, 3.4 */ + if (type&(IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| + IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) + return 1; + return 0; } -#ifdef CONFIG_IPV6_PRIVACY -#define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3) -#else -#define IPV6_GET_SADDR_MAXSCORE(score) (score) -#endif - -int ipv6_dev_get_saddr(struct net_device *dev, +int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct in6_addr *daddr, struct in6_addr *saddr) { - struct inet6_ifaddr *ifp = NULL; - struct inet6_ifaddr *match = NULL; + int daddr_type, daddr_scope; + u32 daddr_label; + struct inet6_ifaddr *ifp0, *ifp = NULL; + struct net_device *dev; struct inet6_dev *idev; - int scope; + int err; - int hiscore = -1, score; + int update; + struct addrselect_attrs candidate = {NULL,0,0,0}; +#if defined(CONFIG_IPV6_PRIVACY) + u16 invpref = 0; +#endif - scope = ipv6_addr_scope(daddr); +#ifdef CONFIG_IPV6_PRIVACY + if (ipv6_devconf.use_tempaddr > 1) + invpref |= IPV6_SADDRSELECT_PUBLIC; +#endif - /* - * known dev - * search dev and walk through dev addresses - */ + daddr_type = __ipv6_addr_type(daddr); + daddr_scope = __ipv6_addr_src_scope(daddr_type); + daddr_label = ipv6_addrselect_label_lookup(daddr, + daddr_dev?daddr_dev->ifindex:0); - if (dev) { - if (dev->flags & IFF_LOOPBACK) - scope = IFA_HOST; + read_lock(&dev_base_lock); + read_lock(&addrconf_lock); + for (dev = dev_base; dev; dev=dev->next) { + /* Rule 0: Candidate Source Address (section 4) + * - multicast and link-local destination address, + * the set of candidate source address MUST only + * include addresses assigned to interfaces + * belonging to the same link as the outgoing + * interface. + * (- For site-local destination addresses, the + * set of candidate source addresses MUST only + * include addresses assigned to interfaces + * belonging to the same site as the outgoing + * interface.) + */ + if ((daddr_type&IPV6_ADDR_MULTICAST || + daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + daddr_dev && dev != daddr_dev) + continue; - read_lock(&addrconf_lock); idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (ifp->flags&IFA_F_TENTATIVE) - continue; -#ifdef CONFIG_IPV6_PRIVACY - score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); -#else - score = ipv6_saddr_pref(ifp, 0); -#endif - if (score <= hiscore) - continue; + if (!idev) + continue; - if (match) - in6_ifa_put(match); - match = ifp; - hiscore = score; - in6_ifa_hold(ifp); + read_lock_bh(&idev->lock); + ifp0 = idev->addr_list; + for (ifp=ifp0; ifp; ifp=ifp->if_next) { + struct addrselect_attrs temp = {NULL,0,0,0}; + int addr_type; + update = 0; + + /* Rule 0: Candidate Source Address (section 4) + * - In any case, anycast addresses, multicast + * addresses, and the unspecified address MUST + * NOT be included in a candidate set. + */ + addr_type = __ipv6_addr_type(&ifp->addr); + if (addr_type == IPV6_ADDR_ANY || + addr_type&IPV6_ADDR_MULTICAST) + continue; - if (IPV6_GET_SADDR_MAXSCORE(score)) { - read_unlock_bh(&idev->lock); - read_unlock(&addrconf_lock); - goto out; - } + /* Rule 1: Prefer same address */ + if (ipv6_addr_cmp(&ifp->addr, daddr) == 0) + temp.flags |= IPV6_SADDRSELECT_SELF; + if ((temp.flags^candidate.flags)&IPV6_SADDRSELECT_SELF) { + update = temp.flags&IPV6_SADDRSELECT_SELF; + if (!update) { + continue; } } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - } - if (scope == IFA_LINK) - goto out; + /* Rule 2: Prefer appropriate scope */ + temp.scope = __ipv6_addr_src_scope(addr_type); + if (!update) { + update = temp.scope - candidate.scope; + if (update > 0) { + update = candidate.scope < daddr_scope ? 1 : -1; + } else if (update < 0) { + update = temp.scope < daddr_scope ? -1 : 1; + } + if (update < 0) { + continue; + } + } - /* - * dev == NULL or search failed for specified dev - */ + /* Rule 3: Avoid deprecated address */ + if (ipv6_addrselect_preferred(addr_type) || + !(ifp->flags & IFA_F_DEPRECATED)) + temp.flags |= IPV6_SADDRSELECT_PREFERRED; + if (!update && (temp.flags^candidate.flags)&IPV6_SADDRSELECT_PREFERRED) { + update = temp.flags&IPV6_SADDRSELECT_PREFERRED; + if (!update) { + continue; + } + } - read_lock(&dev_base_lock); - read_lock(&addrconf_lock); - for (dev = dev_base; dev; dev=dev->next) { - idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (ifp->flags&IFA_F_TENTATIVE) - continue; + /* XXX: Rule 4: Prefer home address */ + + /* Rule 5: Prefer outgoing interface */ + if (daddr_dev == NULL || daddr_dev == dev) + temp.flags |= IPV6_SADDRSELECT_INTERFACE; + if (!update && (temp.flags^candidate.flags)&IPV6_SADDRSELECT_INTERFACE) { + update = temp.flags&IPV6_SADDRSELECT_INTERFACE; + if (!update) { + continue; + } + } + + /* XXX: Rule 6: Prefer matching label */ + if (ipv6_addrselect_label_lookup(&ifp->addr, dev->ifindex) == daddr_label) + temp.flags |= IPV6_SADDRSELECT_LABEL; + if (!update && (temp.flags^candidate.flags)&IPV6_SADDRSELECT_LABEL) { + update = temp.flags&IPV6_SADDRSELECT_LABEL; + if (!update) { + continue; + } + } + + /* XXX: Rule 7: Prefer public address */ #ifdef CONFIG_IPV6_PRIVACY - score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); -#else - score = ipv6_saddr_pref(ifp, 0); + if (!(ifp->flags & IFA_F_TEMPORARY)) + temp.flags |= IPV6_SADDRSELECT_PUBLIC; + if (!update && (temp.flags^candidate.flags)&IPV6_SADDRSELECT_PUBLIC) { + update = (temp.flags^invpref)&IPV6_SADDRSELECT_PUBLIC; + if (!update) { + continue; + } + } #endif - if (score <= hiscore) - continue; - - if (match) - in6_ifa_put(match); - match = ifp; - hiscore = score; - in6_ifa_hold(ifp); - if (IPV6_GET_SADDR_MAXSCORE(score)) { - read_unlock_bh(&idev->lock); - goto out_unlock_base; - } + /* Rule 8: Use longest matching prefix */ + temp.matchlen = ipv6_addr_diff(&ifp->addr, daddr); + if (!update) { + update = temp.matchlen - candidate.matchlen; + if (update < 0) { + continue; } } - read_unlock_bh(&idev->lock); + + /* Final Rule */ + if (!update && candidate.ifp) { + continue; + } + + /* update candidate */ + temp.ifp = ifp; + in6_ifa_hold(ifp); + if (candidate.ifp) + in6_ifa_put(candidate.ifp); + candidate = temp; } + read_unlock_bh(&idev->lock); } - -out_unlock_base: read_unlock(&addrconf_lock); read_unlock(&dev_base_lock); -out: - err = -EADDRNOTAVAIL; - if (match) { - ipv6_addr_copy(saddr, &match->addr); + if (candidate.ifp) { + ipv6_addr_copy(saddr, &candidate.ifp->addr); + in6_ifa_put(candidate.ifp); err = 0; - in6_ifa_put(match); + } else { + err = -EADDRNOTAVAIL; } - return err; } - int ipv6_get_saddr(struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr) { - return ipv6_dev_get_saddr(dst ? dst->dev : NULL, daddr, saddr); + return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_dev : NULL, + daddr, saddr); } - int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; @@ -972,6 +1158,70 @@ return ifp; } +/* address selection: default policy label */ +/* XXX: user level configuration */ +static struct ipv6_addrselect_label { + struct in6_addr addr; + u16 plen; + u32 ifindex; + u32 label; +} ipv6_addrselect_label_table[] = { + /* ::1/128, label = 0 */ + { + .addr = IN6ADDR_LOOPBACK_INIT, + .plen = 128, + .label = 0, + }, + /* ::/0, label = 1 */ + { + .addr = IN6ADDR_ANY_INIT, + .plen = 0, + .label = 1, + }, + /* 2002::/16, label = 2 */ + { + .addr = {{{ 0x20, 0x02 }}}, + .plen = 16, + .label = 2, + }, + /* ::/96, label = 3 */ + { + .plen = 96, + .label = 3, + }, + /* ::ffff:0:0/96, label = 4 */ + { + .addr = {{{ [10] = 0xff, [11] = 0xff }}}, + .plen = 96, + .label = 4, + }, + /* sentinel */ + { + .label = 0xffffffff, + } +}; + +static u32 ipv6_addrselect_label_lookup(const struct in6_addr *addr, + int ifindex) +{ + struct ipv6_addrselect_label *p; + int plen, matchlen = -1; + u32 label = 0xffffffff; + + for (p = ipv6_addrselect_label_table; + p->label != 0xffffffff; + p++) { + if (ifindex && p->ifindex && ifindex != p->ifindex) + continue; + plen = ipv6_addr_diff(addr, &p->addr); + if (plen < p->plen || plen < matchlen) + continue; + matchlen = plen; + label = p->label; + } + return label; +} + int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; @@ -1151,15 +1401,19 @@ /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ static int __ipv6_regen_rndid(struct inet6_dev *idev) { +#ifdef CONFIG_IPV6_PRIVACY_MD5 struct net_device *dev; - struct scatterlist sg[2]; - - sg[0].page = virt_to_page(idev->entropy); - sg[0].offset = offset_in_page(idev->entropy); - sg[0].length = 8; - sg[1].page = virt_to_page(idev->work_eui64); - sg[1].offset = offset_in_page(idev->work_eui64); - sg[1].length = 8; + struct scatterlist sg[] = { + { + .page = virt_to_page(idev->entropy), + .offset = offset_in_page(idev->entropy), + .length = 8, + },{ + .page = virt_to_page(idev->work_eui64), + .offset = offset_in_page(idev->work_eui64), + .length = 8, + } + }; dev = idev->dev; @@ -1169,7 +1423,11 @@ idev); get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64)); } +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ regen: +#ifndef CONFIG_IPV6_PRIVACY_MD5 + get_random_bytes(idev->rndid, sizeof(idev->rndid)); +#else /* CONFIG_IPV6_PRIVACY_MD5 */ spin_lock(&md5_tfm_lock); if (unlikely(md5_tfm == NULL)) { spin_unlock(&md5_tfm_lock); @@ -1181,9 +1439,14 @@ spin_unlock(&md5_tfm_lock); memcpy(idev->rndid, &idev->work_digest[0], 8); +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ idev->rndid[0] &= ~0x02; +#ifndef CONFIG_IPV6_PRIVACY_MD5 + +#else /* CONFIG_IPV6_PRIVACY_MD5 */ memcpy(idev->entropy, &idev->work_digest[8], 8); +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ /* * : * check if generated address is not inappropriate @@ -1982,7 +2245,10 @@ if (idev) { addrconf_sysctl_unregister(&idev->cnf); neigh_sysctl_unregister(idev->nd_parms); - neigh_sysctl_register(dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change); + neigh_sysctl_register(dev, idev->nd_parms, + NET_IPV6, NET_IPV6_NEIGH, + "ipv6", + &ndisc_ifinfo_sysctl_change, NULL); addrconf_sysctl_register(idev, &idev->cnf); } #endif @@ -2427,6 +2693,10 @@ unsigned long regen_advance; #endif +#ifdef CONFIG_IPV6_PRIVACY + regen_advance = ifp->idev->cnf.regen_max_retry * ifp->idev->cnf.dad_transmits * ifp->idev->nd_parms->retrans_time / HZ; +#endif + if (ifp->flags & IFA_F_PERMANENT) continue; @@ -2469,28 +2739,33 @@ } #ifdef CONFIG_IPV6_PRIVACY } else if ((ifp->flags&IFA_F_TEMPORARY) && - !(ifp->flags&IFA_F_TENTATIVE)) { - if (age >= ifp->prefered_lft - regen_advance) { - struct inet6_ifaddr *ifpub = ifp->ifpub; - if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) - next = ifp->tstamp + ifp->prefered_lft * HZ; - if (!ifp->regen_count && ifpub) { - ifp->regen_count++; - in6_ifa_hold(ifp); - in6_ifa_hold(ifpub); - spin_unlock(&ifp->lock); - write_unlock(&addrconf_hash_lock); - ipv6_create_tempaddr(ifpub, ifp); - in6_ifa_put(ifpub); - in6_ifa_put(ifp); - goto restart; - } - } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) - next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; - spin_unlock(&ifp->lock); + !(ifp->flags&IFA_F_TENTATIVE) && + age >= ifp->prefered_lft - regen_advance) { + struct inet6_ifaddr *ifpub = ifp->ifpub; + if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) + next = ifp->tstamp + ifp->prefered_lft * HZ; + if (!ifp->regen_count && ifpub) { + ifp->regen_count++; + in6_ifa_hold(ifp); + in6_ifa_hold(ifpub); + spin_unlock(&ifp->lock); + write_unlock(&addrconf_hash_lock); + ipv6_create_tempaddr(ifpub, ifp); + in6_ifa_put(ifpub); + in6_ifa_put(ifp); + goto restart; + } else { + spin_unlock(&ifp->lock); + } #endif } else { /* ifp->prefered_lft <= ifp->valid_lft */ +#ifdef CONFIG_IPV6_PRIVACY + if (ifp->flags&IFA_F_TEMPORARY) { + if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) + next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; + } else +#endif if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) next = ifp->tstamp + ifp->prefered_lft * HZ; spin_unlock(&ifp->lock); @@ -2835,6 +3110,9 @@ array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; #endif array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; +#ifdef CONFIG_IPV6_MROUTE + array[DEVCONF_MCFORWARDING] = cnf->mc_forwarding; +#endif } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, @@ -3147,7 +3425,7 @@ static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[18]; + ctl_table addrconf_vars[19]; ctl_table addrconf_dev[2]; ctl_table addrconf_conf_dir[2]; ctl_table addrconf_proto_dir[2]; @@ -3296,6 +3574,16 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_IPV6_MROUTE + { + .ctl_name = NET_IPV6_MC_FORWARDING, + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = 0, /* sentinel */ } @@ -3440,6 +3728,9 @@ void __init addrconf_init(void) { +#ifdef CONFIG_IPV6_PRIVACY_MD5 + struct crypto_tfm *tfm; +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -3465,12 +3756,22 @@ register_netdevice_notifier(&ipv6_dev_notf); -#ifdef CONFIG_IPV6_PRIVACY - md5_tfm = crypto_alloc_tfm("md5", 0); - if (unlikely(md5_tfm == NULL)) +#ifdef CONFIG_IPV6_PRIVACY_MD5 + tfm = crypto_alloc_tfm("md5", 0); + if (likely(tfm != NULL)) { + spin_lock(&md5_tfm_lock); + if (likely(md5_tfm == NULL)) { + md5_tfm = tfm; + spin_unlock(&md5_tfm_lock); + } else { + spin_unlock(&md5_tfm_lock); + crypto_free_tfm(tfm); + } + } else { printk(KERN_WARNING "failed to load transform for md5\n"); -#endif + } +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ addrconf_verify(0); rtnetlink_links[PF_INET6] = inet6_rtnetlink_table; @@ -3487,6 +3788,9 @@ struct inet6_dev *idev; struct inet6_ifaddr *ifa; int i; +#ifdef CONFIG_IPV6_PRIVACY_MD5 + struct crypto_tfm *tfm; +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ unregister_netdevice_notifier(&ipv6_dev_notf); @@ -3531,12 +3835,14 @@ rtnl_unlock(); -#ifdef CONFIG_IPV6_PRIVACY - if (likely(md5_tfm != NULL)) { - crypto_free_tfm(md5_tfm); - md5_tfm = NULL; - } -#endif +#ifdef CONFIG_IPV6_PRIVACY_MD5 + spin_lock(&md5_tfm_lock); + tfm = md5_tfm; + md5_tfm = NULL; + spin_unlock(&md5_tfm_lock); + if (likely(tfm)) + crypto_free_tfm(tfm); +#endif /* CONFIG_IPV6_PRIVACY_MD5 */ #ifdef CONFIG_PROC_FS proc_net_remove("if_inet6"); diff -urN linux-2.6.11/net/ipv6/af_inet6.c x1/net/ipv6/af_inet6.c --- linux-2.6.11/net/ipv6/af_inet6.c 2005-03-02 08:38:10.000000000 +0100 +++ x1/net/ipv6/af_inet6.c 2005-03-02 17:30:59.000000000 +0100 @@ -13,6 +13,7 @@ * piggy, Karl Knutson : Socket protocol table * Hideaki YOSHIFUJI : sin6_scope_id support * Arnaldo Melo : check proc_net_create return, cleanups + * Hoerdt Mickael : Added Multicast routing support. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -62,6 +63,9 @@ #include #include +#ifdef CONFIG_IPV6_MROUTE +#include +#endif MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -758,6 +762,9 @@ err = icmpv6_init(&inet6_family_ops); if (err) goto icmp_fail; +#ifdef CONFIG_IPV6_MROUTE + ip6_mr_init(); +#endif err = ndisc_init(&inet6_family_ops); if (err) goto ndisc_fail; diff -urN linux-2.6.11/net/ipv6/ah6.c x1/net/ipv6/ah6.c --- linux-2.6.11/net/ipv6/ah6.c 2005-03-02 08:38:38.000000000 +0100 +++ x1/net/ipv6/ah6.c 2005-02-03 06:35:55.000000000 +0100 @@ -264,6 +264,8 @@ hdr_len = skb->data - skb->nh.raw; ah = (struct ipv6_auth_hdr*)skb->data; + if (x->props.replay_window && xfrm_replay_check(x, ah->seq_no)) + goto out; ahp = x->data; nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; @@ -302,6 +304,8 @@ } } + if (x->props.replay_window) + xfrm_replay_advance(x, ah->seq_no); skb->nh.raw = skb_pull(skb, ah_hlen); memcpy(skb->nh.raw, tmp_hdr, hdr_len); skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); diff -urN linux-2.6.11/net/ipv6/anycast.c x1/net/ipv6/anycast.c --- linux-2.6.11/net/ipv6/anycast.c 2005-03-02 08:38:34.000000000 +0100 +++ x1/net/ipv6/anycast.c 2005-02-03 15:43:50.000000000 +0100 @@ -48,32 +48,6 @@ /* Big ac list lock for all the sockets */ static DEFINE_RWLOCK(ipv6_sk_ac_lock); -/* XXX ip6_addr_match() and ip6_onlink() really belong in net/core.c */ - -static int -ip6_addr_match(struct in6_addr *addr1, struct in6_addr *addr2, int prefix) -{ - __u32 mask; - int i; - - if (prefix > 128 || prefix < 0) - return 0; - if (prefix == 0) - return 1; - for (i=0; i<4; ++i) { - if (prefix >= 32) - mask = ~0; - else - mask = htonl(~0 << (32 - prefix)); - if ((addr1->s6_addr32[i] ^ addr2->s6_addr32[i]) & mask) - return 0; - prefix -= 32; - if (prefix <= 0) - break; - } - return 1; -} - static int ip6_onlink(struct in6_addr *addr, struct net_device *dev) { @@ -87,8 +61,8 @@ if (idev) { read_lock_bh(&idev->lock); for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { - onlink = ip6_addr_match(addr, &ifa->addr, - ifa->prefix_len); + onlink = ipv6_prefix_equal(addr, &ifa->addr, + ifa->prefix_len); if (onlink) break; } diff -urN linux-2.6.11/net/ipv6/esp6.c x1/net/ipv6/esp6.c --- linux-2.6.11/net/ipv6/esp6.c 2005-03-02 08:38:12.000000000 +0100 +++ x1/net/ipv6/esp6.c 2005-02-03 06:35:55.000000000 +0100 @@ -148,6 +148,7 @@ ret = -EINVAL; goto out_nofree; } + esph = (struct ipv6_esp_hdr*)skb->data; if (elen <= 0 || (elen & (blksize-1))) { ret = -EINVAL; @@ -166,6 +167,11 @@ u8 sum[esp->auth.icv_full_len]; u8 sum1[alen]; + if (x->props.replay_window && xfrm_replay_check(x, esph->seq_no)) { + ret = -EINVAL; + goto out; + } + esp->auth.icv(esp, skb, 0, skb->len-alen, sum); if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) @@ -176,6 +182,10 @@ ret = -EINVAL; goto out; } + + if (x->props.replay_window) + xfrm_replay_advance(x, esph->seq_no); + } if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { @@ -185,7 +195,6 @@ skb->ip_summed = CHECKSUM_NONE; - esph = (struct ipv6_esp_hdr*)skb->data; iph = skb->nh.ipv6h; /* Get ivec. This can be wrong, check against another impls. */ @@ -307,7 +316,7 @@ if (x->aalg->alg_key_len > 512) goto error; } - if (x->ealg == NULL) + if (x->ealg == NULL || (x->ealg->alg_key_len == 0 && x->props.ealgo != SADB_EALG_NULL)) goto error; if (x->encap) @@ -349,11 +358,13 @@ goto error; } esp->conf.key = x->ealg->alg_key; - esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) + if (x->props.ealgo == SADB_EALG_NULL) { + esp->conf.key_len = 0; esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else + } else { + esp->conf.key_len = (x->ealg->alg_key_len+7)/8; esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); + } if (esp->conf.tfm == NULL) goto error; esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); diff -urN linux-2.6.11/net/ipv6/exthdrs.c x1/net/ipv6/exthdrs.c --- linux-2.6.11/net/ipv6/exthdrs.c 2005-03-02 08:37:47.000000000 +0100 +++ x1/net/ipv6/exthdrs.c 2005-02-03 05:44:14.000000000 +0100 @@ -156,10 +156,18 @@ { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif kfree_skb(skb); return -1; } @@ -172,7 +180,11 @@ return 1; } +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif return -1; } @@ -220,6 +232,10 @@ struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr; struct in6_addr daddr; +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif int n, i; struct ipv6_rt_hdr *hdr; @@ -227,7 +243,11 @@ if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif kfree_skb(skb); return -1; } @@ -236,7 +256,11 @@ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) || skb->pkt_type != PACKET_HOST) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INADDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); +#endif kfree_skb(skb); return -1; } @@ -252,13 +276,21 @@ } if (hdr->type != IPV6_SRCRT_TYPE_0) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } if (hdr->hdrlen & 0x01) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); return -1; } @@ -271,7 +303,11 @@ n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw); return -1; } @@ -284,7 +320,11 @@ kfree_skb(skb); /* the copy is a forwarded packet */ if (skb2 == NULL) { - IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS); +#else + IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS); +#endif return -1; } *skbp = skb = skb2; @@ -302,7 +342,11 @@ addr += i - 1; if (ipv6_addr_is_multicast(addr)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INADDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); +#endif kfree_skb(skb); return -1; } @@ -321,7 +365,11 @@ if (skb->dst->dev->flags&IFF_LOOPBACK) { if (skb->nh.ipv6h->hop_limit <= 1) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); kfree_skb(skb); @@ -434,29 +482,49 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { LIMIT_NETDEBUG( printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1])); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif goto drop; } pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2)); if (pkt_len <= IPV6_MAXPLEN) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (skb->nh.ipv6h->payload_len) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); +#endif goto drop; } if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { @@ -483,11 +551,13 @@ { -1, } }; -int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) +int ipv6_parse_hopopts(struct sk_buff **skbp, unsigned int *nhoffp) { - IP6CB(skb)->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) - return sizeof(struct ipv6hdr); + IP6CB(*skbp)->hop = sizeof(struct ipv6hdr); + if (ip6_parse_tlv(tlvprochopopt_lst, *skbp)) { + *nhoffp = sizeof(struct ipv6hdr); + return 1; + } return -1; } @@ -543,14 +613,34 @@ ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); if (opt->dst0opt) ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); - if (opt->hopopt) + if (skb->len > IPV6_MAXPLEN - (opt->hopopt ? (opt->hopopt->hdrlen+2)<<2 : 0)) { + /* data is jumbogram */ + u8 *hopt = skb_push(skb, 8); + if (opt->hopopt) { + u8 *hlen; + ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); + hlen = skb->data + 1; + (*hlen)++; + hopt[0] = IPV6_TLV_PADN; + hopt[1] = 0; + } else { + hopt[0] = *proto; + hopt[1] = 0; + } + hopt[2] = 0xC2; + hopt[3] = 4; + *(u32 *)&hopt[4] = htonl(skb->len); + *proto = NEXTHDR_HOP; + } else if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) { - if (opt->dst1opt) + if (opt->dst1opt) { ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); + skb->h.raw = skb->data; + } } struct ipv6_txoptions * diff -urN linux-2.6.11/net/ipv6/icmp.c x1/net/ipv6/icmp.c --- linux-2.6.11/net/ipv6/icmp.c 2005-03-02 08:37:52.000000000 +0100 +++ x1/net/ipv6/icmp.c 2005-02-03 05:44:15.000000000 +0100 @@ -176,7 +176,11 @@ */ dst = ip6_route_output(sk, fl); if (dst->error) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS((((struct rt6_info *)dst)->rt6i_idev), IPSTATS_MIB_OUTNOROUTES); +#else IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); +#endif } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = 1; } else { @@ -381,6 +385,8 @@ hlimit = np->hop_limit; if (hlimit < 0) hlimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hlimit < 0) + hlimit = ipv6_get_hoplimit(dst->dev); msg.skb = skb; msg.offset = skb->nh.raw - skb->data; @@ -467,6 +473,8 @@ hlimit = np->hop_limit; if (hlimit < 0) hlimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hlimit < 0) + hlimit = ipv6_get_hoplimit(dst->dev); idev = in6_dev_get(skb->dev); diff -urN linux-2.6.11/net/ipv6/ip6_fib.c x1/net/ipv6/ip6_fib.c --- linux-2.6.11/net/ipv6/ip6_fib.c 2005-03-02 08:38:19.000000000 +0100 +++ x1/net/ipv6/ip6_fib.c 2005-02-03 13:45:59.000000000 +0100 @@ -18,6 +18,7 @@ * Yuji SEKIYA @USAGI: Support default route on router node; * remove ip6_null_entry from the top of * routing table. + * Ville Nuorvala: Fixes to source address sub trees */ #include #include @@ -49,6 +50,11 @@ struct rt6_statistics rt6_stats; +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) +extern struct rt6_info *rt6_dflt_pointer; +extern spinlock_t rt6_dflt_lock; +#endif + static kmem_cache_t * fib6_node_kmem; enum fib_walk_state_t @@ -80,6 +86,7 @@ #define SUBTREE(fn) NULL #endif +static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); @@ -117,36 +124,6 @@ */ /* - * compare "prefix length" bits of an address - */ - -static __inline__ int addr_match(void *token1, void *token2, int prefixlen) -{ - __u32 *a1 = token1; - __u32 *a2 = token2; - int pdw; - int pbi; - - pdw = prefixlen >> 5; /* num of whole __u32 in prefix */ - pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ - - if (pdw) - if (memcmp(a1, a2, pdw << 2)) - return 0; - - if (pbi) { - __u32 mask; - - mask = htonl((0xffffffff) << (32 - pbi)); - - if ((a1[pdw] ^ a2[pdw]) & mask) - return 0; - } - - return 1; -} - -/* * test bit */ @@ -261,7 +238,7 @@ * Prefix match */ if (plen < fn->fn_bit || - !addr_match(&key->addr, addr, fn->fn_bit)) + !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) goto insert_above; /* @@ -513,6 +490,9 @@ { struct fib6_node *fn; int err = -ENOMEM; +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; +#endif fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); @@ -565,10 +545,6 @@ /* Now link new subtree to main tree */ sfn->parent = fn; fn->subtree = sfn; - if (fn->leaf == NULL) { - fn->leaf = rt; - atomic_inc(&rt->rt6i_ref); - } } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, @@ -578,6 +554,13 @@ goto st_failure; } + /* fib6_add_1 might have cleared the old leaf pointer */ + if (fn->leaf == NULL) { + fn->leaf = rt; + atomic_inc(&rt->rt6i_ref); + } + + pn = fn; fn = sn; } #endif @@ -591,8 +574,29 @@ } out: - if (err) + if (err) { +#ifdef CONFIG_IPV6_SUBTREES + /* If fib6_add_1 has cleared the old leaf pointer in the + * super-tree leaf node, we have to find a new one for it. + * + * This situation will never arise in the sub-tree since + * the node will at least have the duplicate route that + * caused fib6_add_rt2node to fail in the first place. + */ + + if (pn && !(pn->fn_flags & RTN_RTINFO)) { + pn->leaf = fib6_find_prefix(pn); +#if RT6_DEBUG >= 2 + if (!pn->leaf) { + BUG_TRAP(pn->leaf); + pn->leaf = &ip6_null_entry; + } +#endif + atomic_inc(&pn->leaf->rt6i_ref); + } +#endif dst_free(&rt->u.dst); + } return err; #ifdef CONFIG_IPV6_SUBTREES @@ -667,7 +671,7 @@ key = (struct rt6key *) ((u8 *) fn->leaf + args->offset); - if (addr_match(&key->addr, args->addr, key->plen)) + if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) return fn; } @@ -680,16 +684,19 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr) { - struct lookup_args args[2]; struct fib6_node *fn; - - args[0].offset = offsetof(struct rt6_info, rt6i_dst); - args[0].addr = daddr; - + struct lookup_args args[2] = { + { + .offset = offsetof(struct rt6_info, rt6i_dst), + .addr = daddr, + }, #ifdef CONFIG_IPV6_SUBTREES - args[1].offset = offsetof(struct rt6_info, rt6i_src); - args[1].addr = saddr; + { + .offset = offsetof(struct rt6_info, rt6i_src), + .addr = saddr, + }, #endif + }; fn = fib6_lookup_1(root, args); @@ -718,7 +725,7 @@ * Prefix match */ if (plen < fn->fn_bit || - !addr_match(&key->addr, addr, fn->fn_bit)) + !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) return NULL; if (plen == fn->fn_bit) @@ -747,10 +754,8 @@ #ifdef CONFIG_IPV6_SUBTREES if (src_len) { BUG_TRAP(saddr!=NULL); - if (fn == NULL) - fn = fn->subtree; if (fn) - fn = fib6_locate_1(fn, saddr, src_len, + fn = fib6_locate_1(fn->subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); } #endif @@ -1184,7 +1189,9 @@ if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { if (time_after(now, rt->rt6i_expires)) { RT6_TRACE("expiring %p\n", rt); +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) rt6_reset_dflt_pointer(rt); +#endif return -1; } gc_args.more++; diff -urN linux-2.6.11/net/ipv6/ip6_input.c x1/net/ipv6/ip6_input.c --- linux-2.6.11/net/ipv6/ip6_input.c 2005-03-02 08:38:17.000000000 +0100 +++ x1/net/ipv6/ip6_input.c 2005-02-13 19:37:56.000000000 +0100 @@ -19,6 +19,7 @@ * * Mitsuru KANDA @USAGI and * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs(). + * Hoerdt Mickael : Added Multicast routing support. */ #include @@ -59,15 +60,27 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { struct ipv6hdr *hdr; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = NULL; +#endif u32 pkt_len; if (skb->pkt_type == PACKET_OTHERHOST) goto drop; +#ifdef CONFIG_IPV6_STATISTICS + idev = in6_dev_get(dev); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); +#endif if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); +#endif goto out; } @@ -79,10 +92,8 @@ if (skb->len < sizeof(struct ipv6hdr)) goto err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - goto drop; - } + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto err; hdr = skb->nh.ipv6h; @@ -96,10 +107,8 @@ if (pkt_len + sizeof(struct ipv6hdr) > skb->len) goto truncated; if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))){ - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - goto drop; - } + if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))) + goto err; hdr = skb->nh.ipv6h; if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; @@ -107,22 +116,39 @@ } if (hdr->nexthdr == NEXTHDR_HOP) { + unsigned int nhoff = offsetof(struct ipv6hdr, nexthdr); skb->h.raw = (u8*)(hdr+1); - if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - return 0; + if (ipv6_parse_hopopts(&skb, &nhoff) < 0) { + skb = NULL; + goto err; } - hdr = skb->nh.ipv6h; } +#ifdef CONFIG_IPV6_STATISTICS + if (idev) + in6_dev_put(idev); +#endif return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); truncated: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); +#endif err: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif drop: - kfree_skb(skb); + if (skb) + kfree_skb(skb); out: +#ifdef CONFIG_IPV6_STATISTICS + if (idev) + in6_dev_put(idev); +#endif return 0; } @@ -135,17 +161,16 @@ { struct inet6_protocol *ipprot; struct sock *raw_sk; +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif unsigned int nhoff; int nexthdr; u8 hash; int cksum_sub = 0; skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr); - - /* - * Parse extension headers - */ - nexthdr = skb->nh.ipv6h->nexthdr; nhoff = offsetof(struct ipv6hdr, nexthdr); @@ -193,15 +218,27 @@ if (ret > 0) goto resubmit; else if (ret == 0) +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); +#endif } else { if (!raw_sk) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); +#endif icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff); } } else { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); +#endif kfree_skb(skb); } } @@ -209,7 +246,11 @@ return 0; discard: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); +#endif rcu_read_unlock(); kfree_skb(skb); return 0; @@ -224,18 +265,65 @@ int ip6_mc_input(struct sk_buff *skb) { struct ipv6hdr *hdr; +#if defined(CONFIG_IPV6_STATISTICS) || defined(CONFIG_IPV6_MROUTE) + struct dst_entry *dst = skb->dst; +#endif +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif int deliver; + int discard = 1; +#ifdef CONFIG_IPV6_MROUTE + skb->dev = dst->dev; +#endif + +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INMCASTPKTS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); +#endif hdr = skb->nh.ipv6h; deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) || ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); +#ifdef CONFIG_IPV6_MROUTE /* - * IPv6 multicast router mode isnt currently supported. + * IPv6 multicast router mode is now supported ;) */ + if (ipv6_devconf.mc_forwarding == 1) { + int addr_typed; + int addr_types; + + addr_typed = ipv6_addr_type(&hdr->daddr); + addr_types = ipv6_addr_type(&hdr->saddr); + + if (!(addr_typed & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) { + struct sk_buff *skb2; + + /* check if this is a mld message */ + if(hdr->nexthdr == NEXTHDR_HOP) { + if(skb->h.raw[0] == IPPROTO_ICMPV6 ){ + ip6_input(skb); + return 0; + } + } + + if (deliver) { + skb2 = skb_clone(skb,GFP_ATOMIC); + } else { + discard = 0; + skb2 = skb; + } + ip6_mr_input(skb2); + } + } +#else #if 0 + /* + * IPv6 multicast router mode isnt currently supported. + */ if (ipv6_config.multicast_route) { int addr_type; @@ -257,13 +345,14 @@ } } #endif +#endif if (likely(deliver)) { + discard = 0; ip6_input(skb); - return 0; } /* discard */ - kfree_skb(skb); - + if (discard) + kfree_skb(skb); return 0; } diff -urN linux-2.6.11/net/ipv6/ip6_output.c x1/net/ipv6/ip6_output.c --- linux-2.6.11/net/ipv6/ip6_output.c 2005-03-02 08:37:47.000000000 +0100 +++ x1/net/ipv6/ip6_output.c 2005-02-11 17:24:31.000000000 +0100 @@ -75,6 +75,9 @@ struct dst_entry *dst = skb->dst; struct hh_cache *hh = dst->hh; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif if (hh) { int hh_alen; @@ -88,14 +91,18 @@ } else if (dst->neighbour) return dst->neighbour->output(skb); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTNOROUTES); +#else IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); +#endif kfree_skb(skb); return -EINVAL; } /* dev_loopback_xmit for use with netfilter. */ -static int ip6_dev_loopback_xmit(struct sk_buff *newskb) +int ip6_dev_loopback_xmit(struct sk_buff *newskb) { newskb->mac.raw = newskb->data; __skb_pull(newskb, newskb->nh.raw - newskb->data); @@ -112,6 +119,9 @@ { struct dst_entry *dst = skb->dst; struct net_device *dev = dst->dev; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -133,13 +143,21 @@ ip6_dev_loopback_xmit); if (skb->nh.ipv6h->hop_limit == 0) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif kfree_skb(skb); return 0; } } +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); +#endif } return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); @@ -147,7 +165,7 @@ int ip6_output(struct sk_buff *skb) { - if (skb->len > dst_pmtu(skb->dst)) + if (skb->len > dst_pmtu(skb->dst) || ip6_dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); @@ -166,11 +184,21 @@ .saddr = iph->saddr, } }, .proto = iph->nexthdr, }; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = NULL; +#endif dst = ip6_route_output(skb->sk, &fl); +#ifdef CONFIG_IPV6_STATISTICS + idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif if (dst->error) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTNOROUTES); +#else IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); +#endif LIMIT_NETDEBUG( printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); dst_release(dst); @@ -209,11 +237,16 @@ struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif u8 proto = fl->proto; int seg_len = skb->len; int hlimit; u32 mtu; + skb->h.raw = skb->data; + if (opt) { int head_room; @@ -229,7 +262,11 @@ kfree_skb(skb); skb = skb2; if (skb == NULL) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif return -ENOBUFS; } if (sk) @@ -253,6 +290,8 @@ hlimit = np->hop_limit; if (hlimit < 0) hlimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hlimit < 0) + hlimit = ipv6_get_hoplimit(dst->dev); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -263,15 +302,28 @@ mtu = dst_pmtu(dst); if ((skb->len <= mtu) || ipfragok) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); } if (net_ratelimit()) - printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); + printk(KERN_DEBUG "IPv6: sending pkt_too_big to self; " + "saddr=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x, " + "daddr=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x, " + "skb->len(%d) > mtu(%d)\n", + NIP6(hdr->saddr), NIP6(hdr->daddr), + skb->len, mtu); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGFAILS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); +#endif kfree_skb(skb); return -EMSGSIZE; } @@ -348,14 +400,32 @@ struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr = skb->nh.ipv6h; struct inet6_skb_parm *opt = IP6CB(skb); +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif if (ipv6_devconf.forwarding == 0) goto error; +#ifdef CONFIG_USE_POLICY_FWD if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_INDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); +#endif goto drop; } +#else + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_INDISCARDS); +#else + IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); +#endif + goto drop; + } +#endif skb->ip_summed = CHECKSUM_NONE; @@ -392,7 +462,11 @@ } if (!xfrm6_route_forward(skb)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_INDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); +#endif goto drop; } @@ -430,14 +504,23 @@ /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_FRAGFAILS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS); IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); +#endif kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif goto drop; } @@ -447,11 +530,19 @@ hdr->hop_limit--; +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTFORWDATAGRAMS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); +#endif return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INADDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); +#endif drop: kfree_skb(skb); return -EINVAL; @@ -500,17 +591,21 @@ switch (**nexthdr) { case NEXTHDR_HOP: + break; case NEXTHDR_ROUTING: + found_rhdr = 1; + break; case NEXTHDR_DEST: - if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; - if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + if (found_rhdr) + return offset; break; - default : + default: return offset; } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); } return offset; @@ -523,6 +618,9 @@ struct rt6_info *rt = (struct rt6_info*)skb->dst; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = rt->rt6i_idev; +#endif unsigned int mtu, hlen, left, len; u32 frag_id = 0; int ptr, offset = 0, err=0; @@ -566,7 +664,11 @@ tmp_hdr = kmalloc(hlen, GFP_ATOMIC); if (!tmp_hdr) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGFAILS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); +#endif return -ENOMEM; } @@ -622,7 +724,11 @@ kfree(tmp_hdr); if (err == 0) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGOKS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); +#endif return 0; } @@ -632,7 +738,11 @@ frag = skb; } +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGFAILS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); +#endif return err; } @@ -665,7 +775,11 @@ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGFAILS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); +#endif err = -ENOMEM; goto fail; } @@ -723,19 +837,31 @@ * Put this fragment into the sending queue. */ +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGCREATES); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); +#endif err = output(frag); if (err) goto fail; } kfree_skb(skb); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGOKS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); +#endif return err; fail: kfree_skb(skb); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_FRAGFAILS); +#else IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); +#endif return err; } @@ -814,6 +940,9 @@ struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = rt->rt6i_idev; +#endif unsigned int maxfraglen, fragheaderlen; int exthdrlen; int hh_len; @@ -848,6 +977,7 @@ inet->cork.fl = *fl; np->cork.hop_limit = hlimit; inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); + inet->cork.flags |= ip6_dst_allfrag(&rt->u.dst) ? IPCORK_ALLFRAG : 0; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; @@ -899,7 +1029,7 @@ while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = mtu - skb->len; + copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -924,7 +1054,7 @@ * we know we need more fragment(s). */ datalen = length + fraggap; - if (datalen > mtu - fragheaderlen) + if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; @@ -1080,7 +1210,11 @@ return 0; error: inet->cork.length -= length; +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif return err; } @@ -1095,6 +1229,9 @@ struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = np->cork.rt; struct flowi *fl = &inet->cork.fl; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = rt->rt6i_idev; +#endif unsigned char proto = fl->proto; int err = 0; @@ -1140,7 +1277,11 @@ ipv6_addr_copy(&hdr->daddr, final_dst); skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else + IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) @@ -1158,6 +1299,7 @@ if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; + inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); return err; @@ -1170,9 +1312,19 @@ struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst; + struct inet6_dev *idev = NULL; +#endif while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { +#ifdef CONFIG_IPV6_STATISTICS + dst = skb->dst; + idev = ((struct rt6_info *)dst)->rt6i_idev; + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif kfree_skb(skb); } @@ -1185,6 +1337,7 @@ if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; + inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); } diff -urN linux-2.6.11/net/ipv6/ip6_tunnel.c x1/net/ipv6/ip6_tunnel.c --- linux-2.6.11/net/ipv6/ip6_tunnel.c 2005-03-02 08:37:48.000000000 +0100 +++ x1/net/ipv6/ip6_tunnel.c 2005-02-18 10:36:26.000000000 +0100 @@ -736,7 +736,7 @@ dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->hop_limit = t->parms.hop_limit; /*XXX use physical link's mtu */ ipv6h->nexthdr = proto; ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src); ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst); diff -urN linux-2.6.11/net/ipv6/ip6mr.c x1/net/ipv6/ip6mr.c --- linux-2.6.11/net/ipv6/ip6mr.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/ip6mr.c 2005-02-09 16:31:39.000000000 +0100 @@ -0,0 +1,1682 @@ +/* + * Linux IPv6 multicast routing support for BSD pim6sd + * + * (c) 2004 Mickael Hoerdt, + * LSIIT Laboratory, Strasbourg, France + * (c) 2004 Jean-Philippe Andriot, + * 6WIND, Paris, France + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version: $Id: ip6mr.c,v 1.1 2004/12/27 17:54:01 hoerdt Exp $ + * + * Fixes: + * Michael Chastain : Incorrect size of copying. + * Alan Cox : Added the cache manager code + * Alan Cox : Fixed the clone/copy bug and device race. + * Mike McLagan : Routing by source + * Malcolm Beattie : Buffer handling fixes. + * Alexey Kuznetsov : Double buffer free and other fixes. + * SVR Anand : Fixed several multicast bugs and problems. + * Alexey Kuznetsov : Status, optimisations and more. + * Brad Parker : Better behaviour on mrouted upcall + * overflow. + * Carlos Picoto : PIMv1 Support + * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header + * Relax this requirement to work with older peers. + * Mickael Hoerdt and : IPv6 support based on linux/net/ipv4/ipmr.c [Linux 2.x] + * Jean-Philippe Andriot on netinet/ip6_mroute.c [*BSD] + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include + +struct sock *mroute6_socket; + + +/* Big lock, protecting vif table, mrt cache and mroute socket state. + Note that the changes are semaphored via rtnl_lock. + */ + +static rwlock_t mrt_lock = RW_LOCK_UNLOCKED; + +/* + * Multicast router control variables + */ + +static struct mif_device vif6_table[MAXMIFS]; /* Devices */ +static int maxvif; + +#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) + +static int mroute_do_assert; /* Set in PIM assert */ +static int mroute_do_pim; + +static struct mfc6_cache *mfc6_cache_array[MFC_LINES]; /* Forwarding cache */ + +static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ +static atomic_t cache_resolve_queue_len; /* Size of unresolved */ + +/* Special spinlock for queue of unresolved entries */ +static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED; + +/* We return to original Alan's scheme. Hash table of resolved + entries is changed only in process context and protected + with weak lock mrt_lock. Queue of unresolved entries is protected + with strong spinlock mfc_unres_lock. + + In this case data path is free of exclusive locks at all. + */ + +static kmem_cache_t *mrt_cachep; + +static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache, int local); +static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); +static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); + +static struct inet6_protocol pim6_protocol; + +static struct timer_list ipmr_expire_timer; + + +#ifdef CONFIG_PROC_FS + +struct ipmr_mfc_iter { + struct mfc6_cache **cache; + int ct; +}; + + +static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) +{ + struct mfc6_cache *mfc; + + it->cache = mfc6_cache_array; + read_lock(&mrt_lock); + for (it->ct = 0; it->ct < MFC_LINES; it->ct++) + for(mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) + if (pos-- == 0) + return mfc; + read_unlock(&mrt_lock); + + it->cache = &mfc_unres_queue; + spin_lock_bh(&mfc_unres_lock); + for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) + if (pos-- == 0) + return mfc; + spin_unlock_bh(&mfc_unres_lock); + + it->cache = NULL; + return NULL; +} + + + + +/* + * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif + */ + +struct ipmr_vif_iter { + int ct; +}; + +static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, + loff_t pos) +{ + for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { + if(!MIF_EXISTS(iter->ct)) + continue; + if (pos-- == 0) + return &vif6_table[iter->ct]; + } + return NULL; +} + +static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock(&mrt_lock); + return *pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1) + : SEQ_START_TOKEN; +} + +static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct ipmr_vif_iter *iter = seq->private; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ip6mr_vif_seq_idx(iter, 0); + + while (++iter->ct < maxvif) { + if(!MIF_EXISTS(iter->ct)) + continue; + return &vif6_table[iter->ct]; + } + return NULL; +} + +static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) +{ + read_unlock(&mrt_lock); +} + +static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); + } else { + const struct mif_device *vif = v; + const char *name = vif->dev ? vif->dev->name : "none"; + + seq_printf(seq, + "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", + vif - vif6_table, + name, vif->bytes_in, vif->pkt_in, + vif->bytes_out, vif->pkt_out, + vif->flags); + } + return 0; +} + +static struct seq_operations ip6mr_vif_seq_ops = { + .start = ip6mr_vif_seq_start, + .next = ip6mr_vif_seq_next, + .stop = ip6mr_vif_seq_stop, + .show = ip6mr_vif_seq_show, +}; + +static int ip6mr_vif_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int rc = -ENOMEM; + struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + + if (!s) + goto out; + + rc = seq_open(file, &ip6mr_vif_seq_ops); + if (rc) + goto out_kfree; + + s->ct = 0; + seq = file->private_data; + seq->private = s; +out: + return rc; +out_kfree: + kfree(s); + goto out; + +} + +static struct file_operations ip6mr_vif_fops = { + .owner = THIS_MODULE, + .open = ip6mr_vif_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) + : SEQ_START_TOKEN; +} + +static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mfc6_cache *mfc = v; + struct ipmr_mfc_iter *it = seq->private; + + ++*pos; + + if (v == SEQ_START_TOKEN) + return ipmr_mfc_seq_idx(seq->private, 0); + + if (mfc->next) + return mfc->next; + + if (it->cache == &mfc_unres_queue) + goto end_of_list; + + BUG_ON(it->cache != mfc6_cache_array); + + while (++it->ct < MFC_LINES) { + mfc = mfc6_cache_array[it->ct]; + if (mfc) + return mfc; + } + + /* exhausted cache_array, show unresolved */ + read_unlock(&mrt_lock); + it->cache = &mfc_unres_queue; + it->ct = 0; + + spin_lock_bh(&mfc_unres_lock); + mfc = mfc_unres_queue; + if (mfc) + return mfc; + + end_of_list: + spin_unlock_bh(&mfc_unres_lock); + it->cache = NULL; + + return NULL; +} + +static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) +{ + struct ipmr_mfc_iter *it = seq->private; + + if (it->cache == &mfc_unres_queue) + spin_unlock_bh(&mfc_unres_lock); + else if (it->cache == mfc6_cache_array) + read_unlock(&mrt_lock); +} + +static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) +{ + int n; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Group Origin Iif Pkts Bytes Wrong Oifs\n"); + } else { + const struct mfc6_cache *mfc = v; + const struct ipmr_mfc_iter *it = seq->private; + int i; + + for(i=0;i<16;i++) { + seq_printf(seq,"%02x",mfc->mf6c_mcastgrp.s6_addr[i]); + } + seq_printf(seq," "); + for(i=0;i<16;i++) { + seq_printf(seq,"%02x",mfc->mf6c_origin.s6_addr[i]); + } + seq_printf(seq," "); + + seq_printf(seq, "%-3d %8ld %8ld %8ld", + mfc->mf6c_parent, + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); + + if (it->cache != &mfc_unres_queue) { + for(n = mfc->mfc_un.res.minvif; + n < mfc->mfc_un.res.maxvif; n++ ) { + if(MIF_EXISTS(n) + && mfc->mfc_un.res.ttls[n] < 255) + seq_printf(seq, + " %2d:%-3d", + n, mfc->mfc_un.res.ttls[n]); + } + } + seq_putc(seq, '\n'); + } + return 0; +} + +static struct seq_operations ipmr_mfc_seq_ops = { + .start = ipmr_mfc_seq_start, + .next = ipmr_mfc_seq_next, + .stop = ipmr_mfc_seq_stop, + .show = ipmr_mfc_seq_show, +}; + +static int ipmr_mfc_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int rc = -ENOMEM; + struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + + if (!s) + goto out; + + rc = seq_open(file, &ipmr_mfc_seq_ops); + if (rc) + goto out_kfree; + + memset(s, 0, sizeof(*s)); + seq = file->private_data; + seq->private = s; +out: + return rc; +out_kfree: + kfree(s); + goto out; + +} + +static struct file_operations ip6mr_mfc_fops = { + .owner = THIS_MODULE, + .open = ipmr_mfc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +#ifdef CONFIG_IPV6_PIMSM_V2 +static int reg_vif_num = -1; + +static int pim6_rcv(struct sk_buff **pskb,unsigned int *nhoffp) +{ + struct pimreghdr *pim; + struct ipv6hdr *encap; + struct sk_buff *skb = *pskb; + struct net_device *reg_dev = NULL; + + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) + goto drop; + + pim = (struct pimreghdr*)skb->h.raw; + if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || + (pim->flags&PIM_NULL_REGISTER) || + (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && + (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) + goto drop; + + /* check if the inner packet is destined to mcast group */ + encap = (struct ipv6hdr*)(skb->h.raw + sizeof(struct pimreghdr)); + + if(!(ipv6_addr_type(&encap->daddr)&IPV6_ADDR_MULTICAST) || + encap->payload_len == 0 || + ntohs(encap->payload_len) + sizeof(*pim) > skb->len) + goto drop; + + read_lock(&mrt_lock); + if (reg_vif_num >= 0) + reg_dev = vif6_table[reg_vif_num].dev; + if (reg_dev) + dev_hold(reg_dev); + read_unlock(&mrt_lock); + + if (reg_dev == NULL) + goto drop; + + skb->mac.raw = skb->nh.raw; + skb_pull(skb, (u8*)encap - skb->data); + skb->nh.ipv6h = (struct ipv6hdr *)skb->data; + skb->dev = reg_dev; + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + dst_release(skb->dst); + ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; + ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif + netif_rx(skb); + dev_put(reg_dev); + return 0; + drop: + kfree_skb(skb); + return 0; +} + +static struct inet6_protocol pim6_protocol = { + .handler = pim6_rcv, +}; +#endif + +/* Service routines creating virtual interfaces: PIMREG */ +#ifdef CONFIG_IPV6_PIMSM_V2 + + +static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) +{ + read_lock(&mrt_lock); + ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; + ((struct net_device_stats*)dev->priv)->tx_packets++; + ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); + read_unlock(&mrt_lock); + kfree_skb(skb); + return 0; +} + +static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) +{ + return (struct net_device_stats*)dev->priv; +} + +static void reg_vif_setup(struct net_device *dev) +{ + dev->type = ARPHRD_PIMREG; + dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; + dev->flags = IFF_NOARP; + dev->hard_start_xmit = reg_vif_xmit; + dev->get_stats = reg_vif_get_stats; + dev->destructor = free_netdev; +} + +static struct net_device *ip6mr_reg_vif(void) +{ + struct net_device *dev; + struct inet6_dev *in_dev; + + dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg", + reg_vif_setup); + + if (dev == NULL) + return NULL; + + if (register_netdevice(dev)) { + free_netdev(dev); + return NULL; + } + dev->iflink = 0; + + if ((in_dev = ipv6_find_idev(dev)) == NULL) { + goto failure; + } + +/* + * if ((in_dev = __in6_dev_get(dev)) == NULL) + goto failure; +*/ +#if 0 + in_dev->cnf.rp_filter = 0; +#endif + + if (dev_open(dev)) + goto failure; + + return dev; + +failure: + /* allow the register to be completed before unregistering. */ + rtnl_unlock(); + rtnl_lock(); + + unregister_netdevice(dev); + return NULL; +} +#endif + +/* + * Delete a VIF entry + */ + +static int mif6_delete(int vifi) +{ + struct mif_device *v; + struct net_device *dev; + struct inet6_dev *in_dev; + + if (vifi < 0 || vifi >= maxvif) + return -EADDRNOTAVAIL; + + v = &vif6_table[vifi]; + + write_lock_bh(&mrt_lock); + dev = v->dev; + v->dev = NULL; + + if (!dev) { + write_unlock_bh(&mrt_lock); + return -EADDRNOTAVAIL; + } + +#ifdef CONFIG_IPV6_PIMSM_V2 + if (vifi == reg_vif_num) + reg_vif_num = -1; +#endif + + if (vifi+1 == maxvif) { + int tmp; + for (tmp=vifi-1; tmp>=0; tmp--) { + if (MIF_EXISTS(tmp)) + break; + } + maxvif = tmp+1; + } + + write_unlock_bh(&mrt_lock); + + dev_set_allmulti(dev, -1); + + if ((in_dev = __in6_dev_get(dev)) != NULL) { + in_dev->cnf.mc_forwarding--; + } + + if (v->flags&(MIFF_REGISTER)) + unregister_netdevice(dev); + + dev_put(dev); + return 0; +} + +/* Destroy an unresolved cache entry, killing queued skbs + and reporting error to netlink readers. + */ + +static void ip6mr_destroy_unres(struct mfc6_cache *c) +{ + struct sk_buff *skb; + + atomic_dec(&cache_resolve_queue_len); + + while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { + if (skb->nh.ipv6h->version == 0) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; + netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + } else + kfree_skb(skb); + } + + kmem_cache_free(mrt_cachep, c); +} + + +/* Single timer process for all the unresolved queue. */ + +static void ipmr_expire_process(unsigned long dummy) +{ + unsigned long now; + unsigned long expires; + struct mfc6_cache *c, **cp; + + if (!spin_trylock(&mfc_unres_lock)) { + mod_timer(&ipmr_expire_timer, jiffies+HZ/10); + return; + } + + if (atomic_read(&cache_resolve_queue_len) == 0) + goto out; + + now = jiffies; + expires = 10*HZ; + cp = &mfc_unres_queue; + + while ((c=*cp) != NULL) { + if (time_after(c->mfc_un.unres.expires, now)) { + unsigned long interval = c->mfc_un.unres.expires - now; + if (interval < expires) + expires = interval; + cp = &c->next; + continue; + } + + *cp = c->next; + + ip6mr_destroy_unres(c); + } + + if (atomic_read(&cache_resolve_queue_len)) + mod_timer(&ipmr_expire_timer, jiffies + expires); + +out: + spin_unlock(&mfc_unres_lock); +} + +/* Fill oifs list. It is called under write locked mrt_lock. */ + +static void ip6mr_update_threshoulds(struct mfc6_cache *cache, unsigned char *ttls) +{ + int vifi; + + cache->mfc_un.res.minvif = MAXVIFS; + cache->mfc_un.res.maxvif = 0; + memset(cache->mfc_un.res.ttls, 255, MAXVIFS); + + for (vifi=0; vifimfc_un.res.ttls[vifi] = ttls[vifi]; + if (cache->mfc_un.res.minvif > vifi) + cache->mfc_un.res.minvif = vifi; + if (cache->mfc_un.res.maxvif <= vifi) + cache->mfc_un.res.maxvif = vifi + 1; + } + } +} + +static int mif6_add(struct mif6ctl *vifc, int mrtsock) +{ + int vifi = vifc->mif6c_mifi; + struct mif_device *v = &vif6_table[vifi]; + struct net_device *dev; + struct inet6_dev *in_dev; + + /* Is vif busy ? */ + if (MIF_EXISTS(vifi)) + return -EADDRINUSE; + + switch (vifc->mif6c_flags) { +#ifdef CONFIG_IPV6_PIMSM_V2 + case MIFF_REGISTER: + /* + * Special Purpose VIF in PIM + * All the packets will be sent to the daemon + */ + if (reg_vif_num >= 0) + return -EADDRINUSE; + dev = ip6mr_reg_vif(); + if (!dev) + return -ENOBUFS; + break; +#endif + case 0: + dev=dev_get_by_index(vifc->mif6c_pifi); + if (!dev) + return -EADDRNOTAVAIL; + __dev_put(dev); + break; + default: + return -EINVAL; + } + + if ((in_dev = __in6_dev_get(dev)) == NULL) + return -EADDRNOTAVAIL; + in_dev->cnf.mc_forwarding++; + dev_set_allmulti(dev, +1); + + /* + * Fill in the VIF structures + */ + v->rate_limit=vifc->vifc_rate_limit; + v->flags=vifc->mif6c_flags; + if(!mrtsock) + v->flags |= VIFF_STATIC; + v->threshold=vifc->vifc_threshold; + v->bytes_in = 0; + v->bytes_out = 0; + v->pkt_in = 0; + v->pkt_out = 0; + v->link = dev->ifindex; + if (v->flags&(MIFF_REGISTER)) + v->link = dev->iflink; + + /* And finish update writing critical data */ + write_lock_bh(&mrt_lock); + dev_hold(dev); + v->dev=dev; +#ifdef CONFIG_IPV6_PIMSM_V2 + if (v->flags&MIFF_REGISTER) + reg_vif_num = vifi; +#endif + if (vifi+1 > maxvif) + maxvif = vifi+1; + write_unlock_bh(&mrt_lock); + return 0; +} + +static struct mfc6_cache *ip6mr_cache_find(struct in6_addr origin,struct in6_addr mcastgrp) +{ + int line=MFC6_HASH(mcastgrp,origin); + struct mfc6_cache *c; + + for (c=mfc6_cache_array[line]; c; c = c->next) { + if (IN6_ARE_ADDR_EQUAL(&c->mf6c_origin,&origin) && + IN6_ARE_ADDR_EQUAL(&c->mf6c_mcastgrp,&mcastgrp)) + break; + } + return c; +} + +/* + * Allocate a multicast cache entry + */ +static struct mfc6_cache *ip6mr_cache_alloc(void) +{ + struct mfc6_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); + if(c==NULL) + return NULL; + memset(c, 0, sizeof(*c)); + c->mfc_un.res.minvif = MAXVIFS; + return c; +} + +static struct mfc6_cache *ip6mr_cache_alloc_unres(void) +{ + struct mfc6_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); + if(c==NULL) + return NULL; + memset(c, 0, sizeof(*c)); + skb_queue_head_init(&c->mfc_un.unres.unresolved); + c->mfc_un.unres.expires = jiffies + 10*HZ; + return c; +} + +/* + * A cache entry has gone into a resolved state from queued + */ + +static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) +{ + struct sk_buff *skb; + + /* + * Play the pending entries through our router + */ + + while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { + if (skb->nh.ipv6h->version == 0) { + int err; + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); + + if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { + nlh->nlmsg_len = skb->tail - (u8*)nlh; + } else { + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE; + } + err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + } else + ip6_mr_forward(skb, c, 0); + } +} + +/* + * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd + * expects the following bizarre scheme. + * + * Called under mrt_lock. + */ + +static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) +{ + struct sk_buff *skb; + struct mrt6msg *msg; + int ret; + +#ifdef CONFIG_IPV6_PIMSM_V2 + if (assert == MRT6MSG_WHOLEPKT) + skb = skb_realloc_headroom(pkt, sizeof(struct ipv6hdr)); + else +#endif + skb = alloc_skb(128, GFP_ATOMIC); + + if(!skb) + return -ENOBUFS; + + /* I suppose that internal messages + * do not require checksums */ + + skb->ip_summed = CHECKSUM_UNNECESSARY; + +#ifdef CONFIG_IPV6_PIMSM_V2 + if (assert == MRT6MSG_WHOLEPKT) { + /* Ugly, but we have no choice with this interface. + Duplicate old header, fix length etc. + And all this only to mangle msg->im6_msgtype and + to set msg->im6_mbz to "mbz" :-) + */ + msg = (struct mrt6msg*)skb_push(skb, sizeof(struct ipv6hdr)); + skb->nh.raw = skb->h.raw = (u8*)msg; + memcpy(msg, pkt->nh.raw, sizeof(struct ipv6hdr)); + msg->im6_msgtype = MRT6MSG_WHOLEPKT; + msg->im6_mbz = 0; + msg->im6_mif = reg_vif_num; + } else +#endif + { + + /* + * Copy the IP header + */ + + skb->nh.ipv6h = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr)); + memcpy(skb->data,pkt->data,sizeof(struct ipv6hdr)); + + msg = (struct mrt6msg*)skb->nh.ipv6h; + skb->dst = dst_clone(pkt->dst); + + /* + * Add our header + */ + + msg->im6_msgtype = assert; + msg->im6_mbz = 0; + msg->im6_mif = vifi; + skb->h.raw = skb->nh.raw; + } + + if (mroute6_socket == NULL) { + kfree_skb(skb); + return -EINVAL; + } + + /* + * Deliver to user space multicast routing algorithms + */ + if ((ret=sock_queue_rcv_skb(mroute6_socket,skb))<0) { + if (net_ratelimit()) + printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); + kfree_skb(skb); + } + + return ret; +} + +/* + * Queue a packet for resolution. It gets locked cache entry! + */ + +static int +ip6mr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) +{ + int err; + struct mfc6_cache *c; + + spin_lock_bh(&mfc_unres_lock); + for (c=mfc_unres_queue; c; c=c->next) { + if (IN6_ARE_ADDR_EQUAL(&c->mf6c_mcastgrp,&skb->nh.ipv6h->daddr) && + IN6_ARE_ADDR_EQUAL(&c->mf6c_origin,&skb->nh.ipv6h->saddr)) + break; + } + + if (c == NULL) { + /* + * Create a new entry if allowable + */ + + if (atomic_read(&cache_resolve_queue_len)>=10 || + (c=ip6mr_cache_alloc_unres())==NULL) { + spin_unlock_bh(&mfc_unres_lock); + + kfree_skb(skb); + return -ENOBUFS; + } + + /* + * Fill in the new cache entry + */ + c->mf6c_parent=-1; + c->mf6c_origin=skb->nh.ipv6h->saddr; + c->mf6c_mcastgrp=skb->nh.ipv6h->daddr; + + /* + * Reflect first query at pim6sd + */ + if ((err = ip6mr_cache_report(skb, vifi, MRT6MSG_NOCACHE))<0) { + /* If the report failed throw the cache entry + out - Brad Parker + */ + spin_unlock_bh(&mfc_unres_lock); + + kmem_cache_free(mrt_cachep, c); + kfree_skb(skb); + return err; + } + + atomic_inc(&cache_resolve_queue_len); + c->next = mfc_unres_queue; + mfc_unres_queue = c; + + mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); + } + + /* + * See if we can append the packet + */ + if (c->mfc_un.unres.unresolved.qlen>3) { + kfree_skb(skb); + err = -ENOBUFS; + } else { + skb_queue_tail(&c->mfc_un.unres.unresolved,skb); + err = 0; + } + + spin_unlock_bh(&mfc_unres_lock); + return err; +} + +/* + * MFC6 cache manipulation by user space + */ + +static int ip6mr_mfc_delete(struct mf6cctl *mfc) +{ + int line; + struct mfc6_cache *c, **cp; + + line=MFC6_HASH(mfc->mf6cc_mcastgrp.sin6_addr, mfc->mf6cc_origin.sin6_addr); + + for (cp=&mfc6_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { + if (IN6_ARE_ADDR_EQUAL(&c->mf6c_origin,&mfc->mf6cc_origin.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&c->mf6c_mcastgrp,&mfc->mf6cc_mcastgrp.sin6_addr)) { + write_lock_bh(&mrt_lock); + *cp = c->next; + write_unlock_bh(&mrt_lock); + + kmem_cache_free(mrt_cachep, c); + return 0; + } + } + return -ENOENT; +} + +static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct mif_device *v; + int ct; + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + v=&vif6_table[0]; + for(ct=0;ctdev==ptr) + mif6_delete(ct); + } + return NOTIFY_DONE; +} + +static struct notifier_block ip6_mr_notifier = { + .notifier_call = ip6mr_device_event +}; + +/* + * Setup for IP multicast routing + */ + +void __init ip6_mr_init(void) +{ + printk(KERN_INFO "6WIND/LSIIT IPv6 multicast forwarding 0.1 plus PIM-SM/SSM with *BSD API\n"); + + mrt_cachep = kmem_cache_create("ip6_mrt_cache", + sizeof(struct mfc6_cache), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!mrt_cachep) + panic("cannot allocate ip_mrt_cache"); + + init_timer(&ipmr_expire_timer); + ipmr_expire_timer.function=ipmr_expire_process; + register_netdevice_notifier(&ip6_mr_notifier); +#ifdef CONFIG_PROC_FS + proc_net_fops_create("ip6_mr_vif", 0, &ip6mr_vif_fops); + proc_net_fops_create("ip6_mr_cache", 0, &ip6mr_mfc_fops); +#endif +} + + +static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) +{ + int line; + struct mfc6_cache *uc, *c, **cp; + unsigned char ttls[MAXVIFS]; + int i; + + memset(ttls, 255, MAXVIFS); + for(i=0;imf6cc_ifset)) + ttls[i]=1; + + } + + line=MFC6_HASH(mfc->mf6cc_mcastgrp.sin6_addr, mfc->mf6cc_origin.sin6_addr); + + for (cp=&mfc6_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { + if (IN6_ARE_ADDR_EQUAL(&c->mf6c_origin,&mfc->mf6cc_origin.sin6_addr) && + IN6_ARE_ADDR_EQUAL(&c->mf6c_mcastgrp,&mfc->mf6cc_mcastgrp.sin6_addr)) + break; + } + + if (c != NULL) { + write_lock_bh(&mrt_lock); + c->mf6c_parent = mfc->mf6cc_parent; + ip6mr_update_threshoulds(c, ttls); + if (!mrtsock) + c->mfc_flags |= MFC_STATIC; + write_unlock_bh(&mrt_lock); + return 0; + } + + if(!(ipv6_addr_type(&mfc->mf6cc_mcastgrp.sin6_addr)&IPV6_ADDR_MULTICAST)) + return -EINVAL; + + c=ip6mr_cache_alloc(); + if (c==NULL) + return -ENOMEM; + + c->mf6c_origin=mfc->mf6cc_origin.sin6_addr; + c->mf6c_mcastgrp=mfc->mf6cc_mcastgrp.sin6_addr; + c->mf6c_parent=mfc->mf6cc_parent; + ip6mr_update_threshoulds(c, ttls); + if (!mrtsock) + c->mfc_flags |= MFC_STATIC; + + write_lock_bh(&mrt_lock); + c->next = mfc6_cache_array[line]; + mfc6_cache_array[line] = c; + write_unlock_bh(&mrt_lock); + + /* + * Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. + */ + spin_lock_bh(&mfc_unres_lock); + for (cp = &mfc_unres_queue; (uc=*cp) != NULL; + cp = &uc->next) { + if (IN6_ARE_ADDR_EQUAL(&uc->mf6c_origin,&c->mf6c_origin) && + IN6_ARE_ADDR_EQUAL(&uc->mf6c_mcastgrp,&c->mf6c_mcastgrp)) { + *cp = uc->next; + if (atomic_dec_and_test(&cache_resolve_queue_len)) + del_timer(&ipmr_expire_timer); + break; + } + } + spin_unlock_bh(&mfc_unres_lock); + + if (uc) { + ip6mr_cache_resolve(uc, c); + kmem_cache_free(mrt_cachep, uc); + } + return 0; +} + +/* + * Close the multicast socket, and clear the vif tables etc + */ + +static void mroute_clean_tables(struct sock *sk) +{ + int i; + + /* + * Shut down all active vif entries + */ + for(i=0; imfc_flags&MFC_STATIC) { + cp = &c->next; + continue; + } + write_lock_bh(&mrt_lock); + *cp = c->next; + write_unlock_bh(&mrt_lock); + + kmem_cache_free(mrt_cachep, c); + } + } + + if (atomic_read(&cache_resolve_queue_len) != 0) { + struct mfc6_cache *c; + + spin_lock_bh(&mfc_unres_lock); + while (mfc_unres_queue != NULL) { + c = mfc_unres_queue; + mfc_unres_queue = c->next; + spin_unlock_bh(&mfc_unres_lock); + + ip6mr_destroy_unres(c); + + spin_lock_bh(&mfc_unres_lock); + } + spin_unlock_bh(&mfc_unres_lock); + } +} + +static void mrtsock_destruct(struct sock *sk) +{ + rtnl_lock(); + if (sk == mroute6_socket) { + ipv6_devconf.mc_forwarding--; + + write_lock_bh(&mrt_lock); + mroute6_socket=NULL; + write_unlock_bh(&mrt_lock); + + mroute_clean_tables(sk); + } + rtnl_unlock(); +} + +/* + * Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. + */ + +int ip6_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) +{ + int ret; + struct mif6ctl vif; + struct mf6cctl mfc; + mifi_t mifi; + + if(optname!=MRT6_INIT) + { + if(sk!=mroute6_socket && !capable(CAP_NET_ADMIN)) + return -EACCES; + } + + switch(optname) + { + case MRT6_INIT: + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + if(optlen!=sizeof(int)) + return -ENOPROTOOPT; + + rtnl_lock(); + if (mroute6_socket) { + rtnl_unlock(); + return -EADDRINUSE; + } + + ret = ip6_ra_control(sk, 1, mrtsock_destruct); + if (ret == 0) { + write_lock_bh(&mrt_lock); + mroute6_socket=sk; + write_unlock_bh(&mrt_lock); + + ipv6_devconf.mc_forwarding++; + } + rtnl_unlock(); + return ret; + case MRT6_DONE: + if (sk!=mroute6_socket) + return -EACCES; + return ip6_ra_control(sk, -1, NULL); + case MRT6_ADD_MIF: + if(optlen!=sizeof(vif)) + return -EINVAL; + if (copy_from_user(&vif,optval,sizeof(vif))) + return -EFAULT; + if(vif.mif6c_mifi >= MAXVIFS) + return -ENFILE; + rtnl_lock(); + ret = mif6_add(&vif, sk==mroute6_socket); + rtnl_unlock(); + return ret; + case MRT6_DEL_MIF: + if(optlen!=sizeof(mifi_t)) + return -EINVAL; + if (copy_from_user(&mifi,optval,sizeof(mifi_t))) + return -EFAULT; + rtnl_lock(); + ret = mif6_delete(mifi); + rtnl_unlock(); + return ret; + + /* + * Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ + case MRT6_ADD_MFC: + case MRT6_DEL_MFC: + if(optlen!=sizeof(mfc)) + return -EINVAL; + if (copy_from_user(&mfc,optval, sizeof(mfc))) + return -EFAULT; + rtnl_lock(); + if (optname==MRT6_DEL_MFC) + ret = ip6mr_mfc_delete(&mfc); + else + ret = ip6mr_mfc_add(&mfc, sk==mroute6_socket); + rtnl_unlock(); + return ret; + /* + * Control PIM assert (to activate pim will activate assert) + */ + case MRT6_ASSERT: + { + int v; + if(get_user(v,(int __user *)optval)) + return -EFAULT; + mroute_do_assert=(v)?1:0; + return 0; + } +#ifdef CONFIG_IPV6_PIMSM_V2 + case MRT6_PIM: + { + int v, ret; + if(get_user(v,(int __user *)optval)) + return -EFAULT; + v = (v)?1:0; + rtnl_lock(); + ret = 0; + if (v != mroute_do_pim) { + mroute_do_pim = v; + mroute_do_assert = v; + if (mroute_do_pim) + ret = inet6_add_protocol(&pim6_protocol, + IPPROTO_PIM); + else + ret = inet6_del_protocol(&pim6_protocol, + IPPROTO_PIM); + if (ret < 0) + ret = -EAGAIN; + } + rtnl_unlock(); + return ret; + } +#endif + /* + * Spurious command, or MRT_VERSION which you cannot + * set. + */ + default: + return -ENOPROTOOPT; + } +} + +/* + * Getsock opt support for the multicast routing system. + */ + +int ip6_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) +{ + int olr; + int val; + + if(optname!=MRT6_VERSION && +#ifdef CONFIG_IPV6_PIMSM_V2 + optname!=MRT6_PIM && +#endif + optname!=MRT6_ASSERT) + return -ENOPROTOOPT; + + if (get_user(olr, optlen)) + return -EFAULT; + + olr = min_t(unsigned int, olr, sizeof(int)); + if (olr < 0) + return -EINVAL; + + if(put_user(olr,optlen)) + return -EFAULT; + if(optname==MRT6_VERSION) + val=0x0305; +#ifdef CONFIG_IPV6_PIMSM_V2 + else if(optname==MRT6_PIM) + val=mroute_do_pim; +#endif + else + val=mroute_do_assert; + if(copy_to_user(optval,&val,olr)) + return -EFAULT; + return 0; +} + +/* + * The IP multicast ioctl support routines. + */ + +int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) +{ + struct sioc_sg_req6 sr; + struct sioc_mif_req6 vr; + struct mif_device *vif; + struct mfc6_cache *c; + + switch(cmd) + { + case SIOCGETMIFCNT_IN6: + if (copy_from_user(&vr,arg,sizeof(vr))) + return -EFAULT; + if(vr.mifi>=maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif=&vif6_table[vr.mifi]; + if(MIF_EXISTS(vr.mifi)) { + vr.icount=vif->pkt_in; + vr.ocount=vif->pkt_out; + vr.ibytes=vif->bytes_in; + vr.obytes=vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg,&vr,sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT_IN6: + if (copy_from_user(&sr,arg,sizeof(sr))) + return -EFAULT; + + read_lock(&mrt_lock); + c = ip6mr_cache_find(sr.src.sin6_addr, sr.grp.sin6_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + read_unlock(&mrt_lock); + + if (copy_to_user(arg,&sr,sizeof(sr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} + + +static inline int ip6mr_forward_finish(struct sk_buff *skb) +{ +#ifdef notyet + struct ip_options * opt = &(IP6CB(skb)->opt); + + IP_INC_STATS_BH(OutForwDatagrams); + + if (unlikely(opt->optlen)) + ip_forward_options(skb); +#endif + + return dst_output(skb); +} + +/* + * Processing handlers for ip6mr_forward + */ + +static void ip6mr_queue_xmit(struct sk_buff *skb, struct mfc6_cache *c, int vifi) +{ + struct ipv6hdr *ipv6h = skb->nh.ipv6h; + struct mif_device *vif = &vif6_table[vifi]; + struct net_device *dev; +#if 0 + struct rtable *rt; + int encap = 0; +#endif + struct in6_addr *snd_addr=&ipv6h->daddr; + int full_len = skb->len; + + if (vif->dev == NULL) + goto out_free; + +#ifdef CONFIG_IPV6_PIMSM_V2 + if (vif->flags & MIFF_REGISTER) { + vif->pkt_out++; + vif->bytes_out+=skb->len; + ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; + ((struct net_device_stats*)vif->dev->priv)->tx_packets++; + ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); + kfree_skb(skb); + return; + } +#endif + /* + * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + * not only before forwarding, but after forwarding on all output + * interfaces. It is clear, if mrouter runs a multicasting + * program, it should receive packets not depending to what interface + * program is joined. + * If we will not make it, the program will have to join on all + * interfaces. On the other hand, multihoming host (or router, but + * not mrouter) cannot join to more than one interface - it will + * result in receiving multiple packets. + */ + dev = vif->dev; + skb->dev=dev; + vif->pkt_out++; + vif->bytes_out+=skb->len; + + ipv6h = skb->nh.ipv6h; + + ipv6h->hop_limit--; + + if(dev->hard_header) { + unsigned char ha[MAX_ADDR_LEN]; + ndisc_mc_map(snd_addr,ha,dev,1); + if(dev->hard_header(skb,dev, ETH_P_IPV6,ha,NULL,full_len) < 0) + goto out_free; + } + + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, skb->dev, dev, + dev_queue_xmit); +/* NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, skb->dev, dev, + ip6mr_forward_finish); +*/ + + + /* NF_HOOK(PF_INET6, NF_IP6_FORWARD, skb, skb->dev, dev, + ip6mr_forward_finish); + */ + return; + /* XXX */ + +out_free: + kfree_skb(skb); + return; +} + +static int ip6mr_find_vif(struct net_device *dev) +{ + int ct; + for (ct=maxvif-1; ct>=0; ct--) { + if (vif6_table[ct].dev == dev) + break; + } + return ct; +} + +static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache, int local) +{ + int psend = -1; + int vif, ct; + + vif = cache->mf6c_parent; + cache->mfc_un.res.pkt++; + cache->mfc_un.res.bytes += skb->len; + + /* + * Wrong interface: drop packet and (maybe) send PIM assert. + */ + if (vif6_table[vif].dev != skb->dev) { + int true_vifi; + + if (((struct rtable*)skb->dst)->fl.iif == 0) { + /* It is our own packet, looped back. + Very complicated situation... + + The best workaround until routing daemons will be + fixed is not to redistribute packet, if it was + send through wrong interface. It means, that + multicast applications WILL NOT work for + (S,G), which have default multicast route pointing + to wrong oif. In any case, it is not a good + idea to use multicasting applications on router. + */ + goto dont_forward; + } + + cache->mfc_un.res.wrong_if++; + true_vifi = ip6mr_find_vif(skb->dev); + + if (true_vifi >= 0 && mroute_do_assert && + /* pimsm uses asserts, when switching from RPT to SPT, + so that we cannot check that packet arrived on an oif. + It is bad, but otherwise we would need to move pretty + large chunk of pimd to kernel. Ough... --ANK + */ + (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && + time_after(jiffies, + cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { + cache->mfc_un.res.last_assert = jiffies; + ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF); + } + goto dont_forward; + } + + vif6_table[vif].pkt_in++; + vif6_table[vif].bytes_in+=skb->len; + + /* + * Forward the frame + */ + for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { + if (skb->nh.ipv6h->hop_limit > cache->mfc_un.res.ttls[ct]) { + struct ipv6hdr *ipv6h = skb->nh.ipv6h; + if (psend != -1) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + ip6mr_queue_xmit(skb2, cache, psend); + ipv6h->hop_limit++; + } + } + psend=ct; + } + } + if (psend != -1) { + struct ipv6hdr *ipv6h = skb->nh.ipv6h; + if (local) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + ip6mr_queue_xmit(skb2, cache, psend); + ipv6h->hop_limit++; + } + } else { + ip6mr_queue_xmit(skb, cache, psend); + ipv6h->hop_limit++; + return 0; + } + } + +dont_forward: + if (!local) + kfree_skb(skb); + return 0; +} + + +/* + * Multicast packets for forwarding arrive here + */ + +int ip6_mr_input(struct sk_buff *skb) +{ + struct mfc6_cache *cache; + int local = ((struct rt6_info*)skb->dst)->rt6i_flags&RTCF_LOCAL; +#if 0 + IP6CB(skb)->flags = 0; +#endif + + read_lock(&mrt_lock); + cache = ip6mr_cache_find(skb->nh.ipv6h->saddr, skb->nh.ipv6h->daddr); + + /* + * No usable cache entry + */ + if (cache==NULL) { + int vif; + + vif = ip6mr_find_vif(skb->dev); + if (vif >= 0) { + int err = ip6mr_cache_unresolved(vif, skb); + read_unlock(&mrt_lock); + + return err; + } + read_unlock(&mrt_lock); + kfree_skb(skb); + return -ENODEV; + } + + ip6_mr_forward(skb, cache, local); + + read_unlock(&mrt_lock); + + return 0; +#if 0 +dont_forward: + kfree_skb(skb); + return 0; +#endif +} + + +static int +ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) +{ + int ct; + struct rtnexthop *nhp; + struct net_device *dev = vif6_table[c->mf6c_parent].dev; + u8 *b = skb->tail; + struct rtattr *mp_head; + + if (dev) + RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); + + mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); + + for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { + if (c->mfc_un.res.ttls[ct] < 255) { + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = 0; + nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; + nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex; + nhp->rtnh_len = sizeof(*nhp); + } + } + mp_head->rta_type = RTA_MULTIPATH; + mp_head->rta_len = skb->tail - (u8*)mp_head; + rtm->rtm_type = RTN_MULTICAST; + return 1; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -EMSGSIZE; +} diff -urN linux-2.6.11/net/ipv6/ipv6_sockglue.c x1/net/ipv6/ipv6_sockglue.c --- linux-2.6.11/net/ipv6/ipv6_sockglue.c 2005-03-02 08:37:48.000000000 +0100 +++ x1/net/ipv6/ipv6_sockglue.c 2005-02-09 16:31:39.000000000 +0100 @@ -23,6 +23,8 @@ * Changes: * David L Stevens : * - added multicast source filtering API for MLDv2 + * Hoerdt Mickael : + * - added multicat routing support for IPv6 */ #include @@ -55,6 +57,10 @@ #include +#ifdef CONFIG_IPV6_MROUTE +#include +#endif + DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); static struct packet_type ipv6_packet_type = { @@ -69,9 +75,14 @@ { struct ip6_ra_chain *ra, *new_ra, **rap; +#ifndef CONFIG_IPV6_MROUTE /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) return -EINVAL; +#else + if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) + return -EINVAL; +#endif new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -136,6 +147,11 @@ valbool = (val!=0); +#ifdef CONFIG_IPV6_MROUTE + if(optname >= MRT6_BASE && optname <= (MRT6_BASE + 10)) + return ip6_mroute_setsockopt(sk,optname,optval,optlen); +#endif + lock_sock(sk); switch (optname) { @@ -535,6 +551,11 @@ return udp_prot.getsockopt(sk, level, optname, optval, optlen); if(level!=SOL_IPV6) return -ENOPROTOOPT; + +#ifdef CONFIG_IPV6_MROUTE + if(optname >= MRT6_BASE && optname <= (MRT6_BASE + 10)) + return ip6_mroute_getsockopt(sk,optname,optval,optlen); +#endif if (get_user(len, optlen)) return -EFAULT; switch (optname) { diff -urN linux-2.6.11/net/ipv6/ipv6_syms.c x1/net/ipv6/ipv6_syms.c --- linux-2.6.11/net/ipv6/ipv6_syms.c 2005-03-02 08:38:13.000000000 +0100 +++ x1/net/ipv6/ipv6_syms.c 2005-02-03 05:44:15.000000000 +0100 @@ -7,7 +7,7 @@ #include #include -EXPORT_SYMBOL(ipv6_addr_type); +EXPORT_SYMBOL(__ipv6_addr_type); EXPORT_SYMBOL(icmpv6_send); EXPORT_SYMBOL(icmpv6_statistics); EXPORT_SYMBOL(icmpv6_err_convert); @@ -38,4 +38,8 @@ #endif EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(fl6_sock_lookup); +EXPORT_SYMBOL(ip6_append_data); +EXPORT_SYMBOL(ip6_flush_pending_frames); +EXPORT_SYMBOL(ip6_push_pending_frames); +EXPORT_SYMBOL(ip6_dst_lookup); EXPORT_SYMBOL(ipv6_push_nfrag_opts); diff -urN linux-2.6.11/net/ipv6/mcast.c x1/net/ipv6/mcast.c --- linux-2.6.11/net/ipv6/mcast.c 2005-03-02 08:38:25.000000000 +0100 +++ x1/net/ipv6/mcast.c 2005-02-09 16:31:39.000000000 +0100 @@ -62,6 +62,11 @@ #include +#ifdef CONFIG_IPV6_MROUTE +#include +int ip6_dev_loopback_xmit(struct sk_buff *newskb); +#endif + /* Set to 3 to get tracing... */ #define MCAST_DEBUG 2 @@ -1320,7 +1325,11 @@ struct inet6_dev *idev = in6_dev_get(skb->dev); int err; +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h - sizeof(struct ipv6hdr); mldlen = skb->tail - skb->h.raw; @@ -1330,11 +1339,33 @@ IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0)); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); + +#ifdef CONFIG_IPV6_MROUTE + /* + * if we are acting as a multicast router, loopback a copy to the + * process level multicast routing daemon + */ + if (mroute6_socket != NULL) { + struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); + if (newskb) + NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, + newskb->dev, ip6_dev_loopback_xmit); + } +#endif + if (!err) { ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); +#endif } else +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif if (likely(idev != NULL)) in6_dev_put(idev); @@ -1604,7 +1635,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { struct sock *sk = igmp6_socket->sk; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = __in6_dev_get(dev); +#else struct inet6_dev *idev; +#endif struct sk_buff *skb; struct icmp6hdr *hdr; struct in6_addr *snd_addr; @@ -1616,7 +1651,11 @@ IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif snd_addr = addr; if (type == ICMPV6_MGM_REDUCTION) { snd_addr = &all_routers; @@ -1630,7 +1669,11 @@ skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err); if (skb == NULL) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif return; } @@ -1669,22 +1712,47 @@ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); + +#ifdef CONFIG_IPV6_MROUTE + /* + * if we are acting as a multicast router, loopback a copy to the + * process level multicast routing daemon + */ + if (mroute6_socket != NULL) { + struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); + if (newskb) + NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, + newskb->dev, ip6_dev_loopback_xmit); + } +#endif if (!err) { if (type == ICMPV6_MGM_REDUCTION) ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS); else ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); +#endif } else +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif if (likely(idev != NULL)) in6_dev_put(idev); return; out: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif kfree_skb(skb); } diff -urN linux-2.6.11/net/ipv6/ndisc.c x1/net/ipv6/ndisc.c --- linux-2.6.11/net/ipv6/ndisc.c 2005-03-02 08:38:10.000000000 +0100 +++ x1/net/ipv6/ndisc.c 2005-02-16 10:16:52.000000000 +0100 @@ -437,6 +437,7 @@ return; } + ND_PRINTK2("%s:dst=%p\n", __FUNCTION__, dst); if (inc_opt) { if (dev->addr_len) len += NDISC_OPT_SPACE(dev->addr_len); @@ -444,7 +445,7 @@ inc_opt = 0; } - skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev) + dst->header_len + 64, 1, &err); if (skb == NULL) { @@ -485,7 +486,11 @@ skb->dst = dst; idev = in6_dev_get(dst->dev); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS); @@ -534,7 +539,7 @@ if (send_llinfo) len += NDISC_OPT_SPACE(dev->addr_len); - skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev) + dst->header_len + 64, 1, &err); if (skb == NULL) { ND_PRINTK0(KERN_ERR @@ -570,7 +575,11 @@ /* send it! */ skb->dst = dst; idev = in6_dev_get(dst->dev); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS); @@ -610,7 +619,7 @@ if (dev->addr_len) len += NDISC_OPT_SPACE(dev->addr_len); - skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev) + dst->header_len + 64, 1, &err); if (skb == NULL) { ND_PRINTK0(KERN_ERR @@ -644,7 +653,11 @@ /* send it! */ skb->dst = dst; idev = in6_dev_get(dst->dev); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS); @@ -809,7 +822,6 @@ * sender should delay its response * by a random time between 0 and * MAX_ANYCAST_DELAY_TIME seconds. - * (RFC2461) -- yoshfuji */ struct sk_buff *n = skb_clone(skb, GFP_ATOMIC); if (n) @@ -825,7 +837,7 @@ ipv6_addr_all_nodes(&maddr); ndisc_send_na(dev, NULL, &maddr, &msg->target, - idev->cnf.forwarding, 0, (ifp != NULL), 1); + idev->cnf.forwarding, 0, (ifp != NULL) && inc, inc); goto out; } @@ -846,7 +858,7 @@ NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->hard_header) { ndisc_send_na(dev, neigh, saddr, &msg->target, - idev->cnf.forwarding, + idev->cnf.forwarding, 1, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); @@ -1021,6 +1033,7 @@ struct rt6_info *rt; int lifetime; struct ndisc_options ndopts; + int pref = 0; int optlen; __u8 * opt = (__u8 *)(ra_msg + 1); @@ -1082,7 +1095,18 @@ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); +#ifdef CONFIG_IPV6_ROUTER_PREF + pref = IPV6_SIGNEDPREF(ra_msg->icmph.icmp6_router_pref); + if (pref < -1) { + if (net_ratelimit()) + ND_PRINTK2("ICMP6 RA: invalid RA preference; zero lifetime\n"); + lifetime = 0; + } +#endif + rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); + if (rt) + neigh = rt->rt6i_nexthop; if (rt) neigh = rt->rt6i_nexthop; @@ -1097,7 +1121,7 @@ ND_PRINTK3(KERN_DEBUG "ICMPv6 RA: adding default router.\n"); - rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); + rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev, pref); if (rt == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() failed to add default route.\n", @@ -1121,8 +1145,11 @@ if (rt) rt->rt6i_expires = jiffies + (HZ * lifetime); - if (ra_msg->icmph.icmp6_hop_limit) + if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + if (rt) + rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + } /* * Update Reachable Time and Retrans Timer @@ -1329,6 +1356,7 @@ int rd_len; int err; int hlen; + u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; dev = skb->dev; @@ -1341,10 +1369,14 @@ ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr); - rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1); - if (rt == NULL) + /* + * we use ip6_route_output() here so that we do not try to + * send redirect to off-link. + * cf) ndisc_dst_alloc() assumes that destination is on-link. + */ + dst = ip6_route_output(NULL, &fl); + if (dst == NULL) return; - dst = &rt->u.dst; err = xfrm_lookup(&dst, &fl, NULL, 0); if (err) { @@ -1366,16 +1398,14 @@ } if (dev->addr_len) { + read_lock_bh(&neigh->lock); if (neigh->nud_state&NUD_VALID) { - len += NDISC_OPT_SPACE(dev->addr_len); - } else { - /* If nexthop is not valid, do not redirect! - We will make it later, when will be sure, - that it is alive. - */ - dst_release(dst); - return; - } + memcpy(ha_buf, neigh->ha, dev->addr_len); + read_unlock_bh(&neigh->lock); + ha = ha_buf; + len += NDISC_OPT_SPACE(dev->addr_len); + } else + read_unlock_bh(&neigh->lock); } rd_len = min_t(unsigned int, @@ -1383,7 +1413,7 @@ rd_len &= ~0x7; len += rd_len; - buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev) + dst->header_len + 64, 1, &err); if (buff == NULL) { ND_PRINTK0(KERN_ERR @@ -1420,8 +1450,8 @@ * include target_address option */ - if (dev->addr_len) - opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, + if (ha) + opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha, dev->addr_len, dev->type); /* @@ -1441,7 +1471,11 @@ buff->dst = dst; idev = in6_dev_get(dst->dev); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS); @@ -1533,10 +1567,46 @@ }; #ifdef CONFIG_SYSCTL +static void __ndisc_ifinfo_notifier(struct inet6_dev *idev) +{ + idev->tstamp = jiffies; + inet6_ifinfo_notify(RTM_NEWLINK, idev); +} + +static void ndisc_ifinfo_notifier(void *data) +{ + struct net_device *dev = data; + if (dev) { + struct inet6_dev *idev = in6_dev_get(dev); + if (idev) { + __ndisc_ifinfo_notifier(idev); + in6_dev_put(idev); + } + } +} + int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; +#if 1 + static char warncomm[TASK_COMM_LEN]; + static int warned; + const char *dev_name = dev ? dev->name : "default"; + + if (strcmp(warncomm, current->comm) && warned < 5) { + strcpy(warncomm, current->comm); + printk(KERN_WARNING + "process `%s' is using old sysctl " + "net.ipv6.neigh.%s.%s; " + "Use net.ipv6.neigh.%s.%s_ms " + "instead.\n", + warncomm, + dev_name, ctl->procname, + dev_name, ctl->procname); + warned++; + } +#endif if (write && dev && (idev = in6_dev_get(dev)) != NULL) { idev->tstamp = jiffies; @@ -1578,7 +1648,8 @@ #ifdef CONFIG_SYSCTL neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, - "ipv6", &ndisc_ifinfo_sysctl_change); + "ipv6", &ndisc_ifinfo_sysctl_change, + ndisc_ifinfo_notifier); #endif register_netdevice_notifier(&ndisc_netdev_notifier); diff -urN linux-2.6.11/net/ipv6/netfilter/Kconfig x1/net/ipv6/netfilter/Kconfig --- linux-2.6.11/net/ipv6/netfilter/Kconfig 2005-03-02 08:38:25.000000000 +0100 +++ x1/net/ipv6/netfilter/Kconfig 2005-02-03 05:44:15.000000000 +0100 @@ -5,6 +5,16 @@ menu "IPv6: Netfilter Configuration" depends on INET && IPV6 && NETFILTER +config IP6_NF_FTP + tristate "FTP protocol support" + depends on IP6_NF_CONNTRACK + help + Tracking FTP connections is problematic: special helpers are + required for tracking them. + + If you want to compile it as a module, say M here and read + . If unsure, say `Y'. + #tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK #if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK @@ -167,6 +177,32 @@ To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_CONNTRACK + tristate "Connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + It can also be used to enhance packet filtering + (see `Connection state match support' + below). + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config IP6_NF_MATCH_STATE + tristate "Connection state match support" + depends on IP6_NF_CONNTRACK && IP6_NF_IPTABLES + help + Connection state matching allows you to match packets based on their + relationship to a tracked connection (ie. previous packets). This + is a powerful tool for packet classification. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + # dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES # dep_tristate ' TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES # if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then @@ -196,8 +232,17 @@ To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP6_NF_FILTER + help + The REJECT target allows a filtering rule to specify that an ICMP + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then -# dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then # dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER # fi diff -urN linux-2.6.11/net/ipv6/netfilter/Makefile x1/net/ipv6/netfilter/Makefile --- linux-2.6.11/net/ipv6/netfilter/Makefile 2005-03-02 08:38:10.000000000 +0100 +++ x1/net/ipv6/netfilter/Makefile 2004-09-30 15:26:36.000000000 +0200 @@ -2,6 +2,18 @@ # Makefile for the netfilter modules on top of IPv6. # +# objects for the conntrack +ip6_nf_conntrack-objs := ip6_conntrack_core.o ip6_conntrack_proto_generic.o ip6_conntrack_proto_tcp.o ip6_conntrack_proto_udp.o ip6_conntrack_proto_icmpv6.o ip6_conntrack_reasm.o + +# objects for the standalone - connection tracking +ip6_conntrack-objs := ip6_conntrack_standalone.o $(ip6_nf_conntrack-objs) + +# connection tracking +obj-$(CONFIG_IP6_NF_CONNTRACK) += ip6_conntrack.o + +# connection tracking helpers +obj-$(CONFIG_IP6_NF_FTP) += ip6_conntrack_ftp.o + # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o @@ -24,3 +36,5 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_IP6_NF_MATCH_STATE) += ip6t_state.o +obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_core.c x1/net/ipv6/netfilter/ip6_conntrack_core.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_core.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_core.c 2005-03-01 04:42:08.000000000 +0100 @@ -0,0 +1,1593 @@ +/* + * IPv6 Connection Tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_core.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + * Public Licence. + * + * 23 Apr 2001: Harald Welte + * - new API and handling of conntrack/nat helpers + * - now capable of multiple expectations for one master + * 16 Jul 2002: Harald Welte + * - add usage/reference counts to ip_conntrack_expect + * - export ip_conntrack[_expect]_{find_get,put} functions + * */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip6_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip6_conntrack_lock) + +#include +#include +#include +#include +#include + +#define IP6_CONNTRACK_VERSION "0.1" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip6_conntrack_lock); +DECLARE_RWLOCK(ip6_conntrack_expect_tuple_lock); + +void (*ip6_conntrack_destroyed)(struct ip6_conntrack *conntrack) = NULL; +LIST_HEAD(ip6_conntrack_expect_list); +LIST_HEAD(ip6_protocol_list); +static LIST_HEAD(helpers); +unsigned int ip6_conntrack_htable_size = 0; +static int ip6_conntrack_max = 0; +static atomic_t ip6_conntrack_count = ATOMIC_INIT(0); +struct list_head *ip6_conntrack_hash; +static kmem_cache_t *ip6_conntrack_cachep; + +extern struct ip6_conntrack_protocol ip6_conntrack_generic_protocol; + +/* + * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c + * + * This function parses (probably truncated) exthdr set "hdr" + * of length "len". "nexthdrp" initially points to some place, + * where type of the first header can be found. + * + * It skips all well-known exthdrs, and returns pointer to the start + * of unparsable area i.e. the first header with unknown type. + * if success, *nexthdr is updated by type/protocol of this header. + * + * NOTES: - it may return pointer pointing beyond end of packet, + * if the last recognized header is truncated in the middle. + * - if packet is truncated, so that all parsed headers are skipped, + * it returns -1. + * - First fragment header is skipped, not-first ones + * are considered as unparsable. + * - ESP is unparsable for now and considered like + * normal payload protocol. + * - Note also special handling of AUTH header. Thanks to IPsec wizards. + */ + +static int ip6_ct_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, + int len) +{ + u8 nexthdr = *nexthdrp; + + while (ipv6_ext_hdr(nexthdr)) { + struct ipv6_opt_hdr hdr; + int hdrlen; + + if (len < (int)sizeof(struct ipv6_opt_hdr)) + return -1; + if (nexthdr == NEXTHDR_NONE) + break; + if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + BUG(); + if (nexthdr == NEXTHDR_FRAGMENT) { + struct frag_hdr fhdr; + + if (len < (int)sizeof(struct frag_hdr)) + return -1; + if (skb_copy_bits(skb, start, &fhdr, sizeof(fhdr))) + BUG(); + if (ntohs(fhdr.frag_off) & ~0x7) + return -1; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr.hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(&hdr); + + nexthdr = hdr.nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *nexthdrp = nexthdr; + return start; +} + +int ip6_ct_tuple_src_equal(const struct ip6_conntrack_tuple *t1, + const struct ip6_conntrack_tuple *t2) +{ + if (ipv6_addr_cmp(&t1->src.ip, &t2->src.ip)) + return 0; + + if (t1->src.u.all != t2->src.u.all) + return 0; + + if (t1->dst.protonum != t2->dst.protonum) + return 0; + + return 1; + +} + +int ip6_ct_tuple_dst_equal(const struct ip6_conntrack_tuple *t1, + const struct ip6_conntrack_tuple *t2) +{ + if (ipv6_addr_cmp(&t1->dst.ip, &t2->dst.ip)) + return 0; + + if (t1->dst.u.all != t2->dst.u.all) + return 0; + + if (t1->dst.protonum != t2->dst.protonum) + return 0; + + return 1; +} + +int ip6_ct_tuple_equal(const struct ip6_conntrack_tuple *t1, + const struct ip6_conntrack_tuple *t2) +{ + return ip6_ct_tuple_src_equal(t1, t2) && ip6_ct_tuple_dst_equal(t1, t2); +} + +int ip6_ct_tuple_mask_cmp(const struct ip6_conntrack_tuple *t, + const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *mask) +{ + int count = 0; + + for (count = 0; count < 8; count++){ + if ((ntohs(t->src.ip.s6_addr16[count]) ^ + ntohs(tuple->src.ip.s6_addr16[count])) & + ntohs(mask->src.ip.s6_addr16[count])) + return 0; + + if ((ntohs(t->dst.ip.s6_addr16[count]) ^ + ntohs(tuple->dst.ip.s6_addr16[count])) & + ntohs(mask->dst.ip.s6_addr16[count])) + return 0; + } + + if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all) + return 0; + + if ((t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all) + return 0; + + if ((t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum) + return 0; + + return 1; +} + +static inline int proto_cmpfn(const struct ip6_conntrack_protocol *curr, + u_int8_t protocol) +{ + return protocol == curr->proto; +} + +struct ip6_conntrack_protocol *__ip6_ct_find_proto(u_int8_t protocol) +{ + struct ip6_conntrack_protocol *p; + + MUST_BE_READ_LOCKED(&ip6_conntrack_lock); + p = LIST_FIND(&ip6_protocol_list, proto_cmpfn, + struct ip6_conntrack_protocol *, protocol); + if (!p) + p = &ip6_conntrack_generic_protocol; + + return p; +} + +struct ip6_conntrack_protocol *ip6_ct_find_proto(u_int8_t protocol) +{ + struct ip6_conntrack_protocol *p; + + READ_LOCK(&ip6_conntrack_lock); + p = __ip6_ct_find_proto(protocol); + READ_UNLOCK(&ip6_conntrack_lock); + return p; +} + +inline void +ip6_conntrack_put(struct ip6_conntrack *ct) +{ + IP6_NF_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} + +static int ip6_conntrack_hash_rnd_initted; +static unsigned int ip6_conntrack_hash_rnd; +static u_int32_t +hash_conntrack(const struct ip6_conntrack_tuple *tuple) +{ + u32 a, b, c; + + a = tuple->src.ip.s6_addr32[0]; + b = tuple->src.ip.s6_addr32[1]; + c = tuple->src.ip.s6_addr32[2]; + + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += ip6_conntrack_hash_rnd; + __jhash_mix(a, b, c); + + a += tuple->src.ip.s6_addr32[3]; + b += tuple->dst.ip.s6_addr32[0]; + c += tuple->dst.ip.s6_addr32[1]; + __jhash_mix(a, b, c); + + a += tuple->dst.ip.s6_addr32[2]; + b += tuple->dst.ip.s6_addr32[3]; + c += tuple->src.u.all | (tuple->dst.u.all << 16); + __jhash_mix(a, b, c); + + a += tuple->dst.protonum; + __jhash_mix(a, b, c); + + return c % ip6_conntrack_htable_size; +} + +int +ip6_get_tuple(const struct ipv6hdr *ipv6h, + const struct sk_buff *skb, + unsigned int dataoff, + u_int8_t protonum, + struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_protocol *protocol) +{ + /* Should I check that this packet is'nt fragmented + like IPv4 conntrack? - kozakai */ + + ipv6_addr_copy(&tuple->src.ip, &ipv6h->saddr); + ipv6_addr_copy(&tuple->dst.ip, &ipv6h->daddr); + + tuple->dst.protonum = protonum; + + return protocol->pkt_to_tuple(skb, dataoff, tuple); +} + +static int +invert_tuple(struct ip6_conntrack_tuple *inverse, + const struct ip6_conntrack_tuple *orig, + const struct ip6_conntrack_protocol *protocol) +{ + ipv6_addr_copy(&inverse->src.ip, &orig->dst.ip); + ipv6_addr_copy(&inverse->dst.ip, &orig->src.ip); + inverse->dst.protonum = orig->dst.protonum; + + return protocol->invert_tuple(inverse, orig); +} + + +/* ip6_conntrack_expect helper functions */ + +/* Compare tuple parts depending on mask. */ +static inline int expect_cmp(const struct ip6_conntrack_expect *i, + const struct ip6_conntrack_tuple *tuple) +{ + MUST_BE_READ_LOCKED(&ip6_conntrack_expect_tuple_lock); + return ip6_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask); +} + +static void +destroy_expect(struct ip6_conntrack_expect *exp) +{ + DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use)); + IP6_NF_ASSERT(atomic_read(&exp->use)); + IP6_NF_ASSERT(!timer_pending(&exp->timeout)); + + kfree(exp); +} + + +inline void ip6_conntrack_expect_put(struct ip6_conntrack_expect *exp) +{ + IP6_NF_ASSERT(exp); + + if (atomic_dec_and_test(&exp->use)) { + /* usage count dropped to zero */ + destroy_expect(exp); + } +} + +static inline struct ip6_conntrack_expect * +__ip6_ct_expect_find(const struct ip6_conntrack_tuple *tuple) +{ + MUST_BE_READ_LOCKED(&ip6_conntrack_lock); + MUST_BE_READ_LOCKED(&ip6_conntrack_expect_tuple_lock); + return LIST_FIND(&ip6_conntrack_expect_list, expect_cmp, + struct ip6_conntrack_expect *, tuple); +} + +/* Find a expectation corresponding to a tuple. */ +struct ip6_conntrack_expect * +ip6_conntrack_expect_find_get(const struct ip6_conntrack_tuple *tuple) +{ + struct ip6_conntrack_expect *exp; + + READ_LOCK(&ip6_conntrack_lock); + READ_LOCK(&ip6_conntrack_expect_tuple_lock); + exp = __ip6_ct_expect_find(tuple); + if (exp) + atomic_inc(&exp->use); + READ_UNLOCK(&ip6_conntrack_expect_tuple_lock); + READ_UNLOCK(&ip6_conntrack_lock); + + return exp; +} + +/* remove one specific expectation from all lists and drop refcount, + * does _NOT_ delete the timer. */ +static void __unexpect_related(struct ip6_conntrack_expect *expect) +{ + DEBUGP("unexpect_related(%p)\n", expect); + MUST_BE_WRITE_LOCKED(&ip6_conntrack_lock); + + /* we're not allowed to unexpect a confirmed expectation! */ + IP6_NF_ASSERT(!expect->sibling); + + /* delete from global and local lists */ + list_del(&expect->list); + list_del(&expect->expected_list); + + /* decrement expect-count of master conntrack */ + if (expect->expectant) + expect->expectant->expecting--; + + ip6_conntrack_expect_put(expect); +} + +/* remove one specific expecatation from all lists, drop refcount + * and expire timer. + * This function can _NOT_ be called for confirmed expects! */ +static void unexpect_related(struct ip6_conntrack_expect *expect) +{ + IP6_NF_ASSERT(expect->expectant); + IP6_NF_ASSERT(expect->expectant->helper); + /* if we are supposed to have a timer, but we can't delete + * it: race condition. __unexpect_related will + * be calledd by timeout function */ + if (expect->expectant->helper->timeout + && !del_timer(&expect->timeout)) + return; + + __unexpect_related(expect); +} + +/* delete all unconfirmed expectations for this conntrack */ +static void remove_expectations(struct ip6_conntrack *ct, int drop_refcount) +{ + struct list_head *exp_entry, *next; + struct ip6_conntrack_expect *exp; + + DEBUGP("remove_expectations(%p)\n", ct); + + list_for_each_safe(exp_entry, next, &ct->sibling_list) { + exp = list_entry(exp_entry, struct ip6_conntrack_expect, + expected_list); + + /* we skip established expectations, as we want to delete + * the un-established ones only */ + if (exp->sibling) { + DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct); + if (drop_refcount) { + /* Indicate that this expectations parent is dead */ + ip6_conntrack_put(exp->expectant); + exp->expectant = NULL; + } + continue; + } + + IP6_NF_ASSERT(list_inlist(&ip6_conntrack_expect_list, exp)); + IP6_NF_ASSERT(exp->expectant == ct); + + /* delete expectation from global and private lists */ + unexpect_related(exp); + } +} + +static void +clean_from_lists(struct ip6_conntrack *ct) +{ + unsigned int ho, hr; + + DEBUGP("clean_from_lists(%p)\n", ct); + MUST_BE_WRITE_LOCKED(&ip6_conntrack_lock); + + ho = hash_conntrack(&ct->tuplehash[IP6_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[IP6_CT_DIR_REPLY].tuple); + + LIST_DELETE(&ip6_conntrack_hash[ho], + &ct->tuplehash[IP6_CT_DIR_ORIGINAL]); + LIST_DELETE(&ip6_conntrack_hash[hr], + &ct->tuplehash[IP6_CT_DIR_REPLY]); + + /* Destroy all un-established, pending expectations */ + remove_expectations(ct, 1); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct ip6_conntrack *ct = (struct ip6_conntrack *)nfct, *master = NULL; + struct ip6_conntrack_protocol *proto; + + DEBUGP("destroy_conntrack(%p)\n", ct); + IP6_NF_ASSERT(atomic_read(&nfct->use) == 0); + IP6_NF_ASSERT(!timer_pending(&ct->timeout)); + + /* To make sure we don't get any weird locking issues here: + * destroy_conntrack() MUST NOT be called with a write lock + * to ip6_conntrack_lock!!! -HW */ + proto = ip6_ct_find_proto(ct->tuplehash[IP6_CT_DIR_REPLY].tuple.dst.protonum); + if (proto && proto->destroy) + proto->destroy(ct); + + if (ip6_conntrack_destroyed) + ip6_conntrack_destroyed(ct); + + WRITE_LOCK(&ip6_conntrack_lock); + /* Delete us from our own list to prevent corruption later */ + list_del(&ct->sibling_list); + + /* Delete our master expectation */ + if (ct->master) { + if (ct->master->expectant) { + /* can't call __unexpect_related here, + * since it would screw up expect_list */ + list_del(&ct->master->expected_list); + master = ct->master->expectant; + } + kfree(ct->master); + } + WRITE_UNLOCK(&ip6_conntrack_lock); + + if (master) + ip6_conntrack_put(master); + + DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + kmem_cache_free(ip6_conntrack_cachep, ct); + atomic_dec(&ip6_conntrack_count); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct ip6_conntrack *ct = (void *)ul_conntrack; + + WRITE_LOCK(&ip6_conntrack_lock); + clean_from_lists(ct); + WRITE_UNLOCK(&ip6_conntrack_lock); + ip6_conntrack_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct ip6_conntrack_tuple_hash *i, + const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack *ignored_conntrack) +{ + MUST_BE_READ_LOCKED(&ip6_conntrack_lock); + return i->ctrack != ignored_conntrack + && ip6_ct_tuple_equal(tuple, &i->tuple); +} + +static struct ip6_conntrack_tuple_hash * +__ip6_conntrack_find(const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack *ignored_conntrack) +{ + struct ip6_conntrack_tuple_hash *h; + unsigned int hash = hash_conntrack(tuple); + + MUST_BE_READ_LOCKED(&ip6_conntrack_lock); + h = LIST_FIND(&ip6_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip6_conntrack_tuple_hash *, + tuple, ignored_conntrack); + return h; +} + +/* Find a connection corresponding to a tuple. */ +struct ip6_conntrack_tuple_hash * +ip6_conntrack_find_get(const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack *ignored_conntrack) +{ + struct ip6_conntrack_tuple_hash *h; + + READ_LOCK(&ip6_conntrack_lock); + h = __ip6_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip6_conntrack_lock); + + return h; +} + +/* Confirm a connection given skb; places it in hash table */ +int __ip6_conntrack_confirm(struct sk_buff *skb) +{ + unsigned int hash, repl_hash; + struct ip6_conntrack *ct; + enum ip6_conntrack_info ctinfo; + + ct = ip6_conntrack_get(skb, &ctinfo); + + /* ip6t_REJECT uses ip6_conntrack_attach to attach related + ICMP/TCP RST packets in other direction. Actual packet + which created connection will be IP6_CT_NEW or for an + expected connection, IP6_CT_RELATED. */ + if (CTINFO2DIR(ctinfo) != IP6_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + hash = hash_conntrack(&ct->tuplehash[IP6_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP6_CT_DIR_REPLY].tuple); + + /* We're not in hash table, and we refuse to set up related + connections for unconfirmed conns. But packet copies and + REJECT will give spurious warnings here. */ + /* IP6_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ + + /* No external references means noone else could have + confirmed us. */ + IP6_NF_ASSERT(!is_confirmed(ct)); + DEBUGP("Confirming conntrack %p\n", ct); + + WRITE_LOCK(&ip6_conntrack_lock); + /* See if there's one in the list already, including reverse: + NAT could have grabbed it without realizing, since we're + not in the hash. If there is, we lost race. */ + if (!LIST_FIND(&ip6_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip6_conntrack_tuple_hash *, + &ct->tuplehash[IP6_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&ip6_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct ip6_conntrack_tuple_hash *, + &ct->tuplehash[IP6_CT_DIR_REPLY].tuple, NULL)) { + list_prepend(&ip6_conntrack_hash[hash], + &ct->tuplehash[IP6_CT_DIR_ORIGINAL]); + list_prepend(&ip6_conntrack_hash[repl_hash], + &ct->tuplehash[IP6_CT_DIR_REPLY]); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + wierd delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IP6S_CONFIRMED_BIT, &ct->status); + WRITE_UNLOCK(&ip6_conntrack_lock); + return NF_ACCEPT; + } + + WRITE_UNLOCK(&ip6_conntrack_lock); + return NF_DROP; +} + +/* Is this needed ? this code is for NAT. - kozakai */ +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +ip6_conntrack_tuple_taken(const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack *ignored_conntrack) +{ + struct ip6_conntrack_tuple_hash *h; + + READ_LOCK(&ip6_conntrack_lock); + h = __ip6_conntrack_find(tuple, ignored_conntrack); + READ_UNLOCK(&ip6_conntrack_lock); + + return h != NULL; +} + +/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ +struct ip6_conntrack * +icmp6_error_track(struct sk_buff *skb, + unsigned int icmp6off, + enum ip6_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct ip6_conntrack_tuple intuple, origtuple; + struct ip6_conntrack_tuple_hash *h; + struct ipv6hdr *ip6h; + struct icmp6hdr hdr; + struct ipv6hdr inip6h; + unsigned int inip6off; + struct ip6_conntrack_protocol *inproto; + u_int8_t inprotonum; + unsigned int inprotoff; + + IP6_NF_ASSERT(skb->nfct == NULL); + + ip6h = skb->nh.ipv6h; + if (skb_copy_bits(skb, icmp6off, &hdr, sizeof(hdr)) != 0) { + DEBUGP("icmp_error_track: Can't copy ICMPv6 hdr.\n"); + return NULL; + } + + if (hdr.icmp6_type >= 128) + return NULL; + + /* + * Should I ignore invalid ICMPv6 error here ? + * ex) ICMPv6 error in ICMPv6 error, Fragmented packet, and so on. + * - kozakai + */ + + /* Why not check checksum in IPv4 conntrack ? - kozakai */ + /* Ignore it if the checksum's bogus. */ + + if (csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - icmp6off, + IPPROTO_ICMPV6, + skb_checksum(skb, icmp6off, + skb->len - icmp6off, 0))) { + DEBUGP("ICMPv6 checksum failed\n"); + return NULL; + } + + inip6off = icmp6off + sizeof(hdr); + if (skb_copy_bits(skb, inip6off, &inip6h, sizeof(inip6h)) != 0) { + DEBUGP("Can't copy inner IPv6 hdr.\n"); + return NULL; + } + + inprotonum = inip6h.nexthdr; + inprotoff = ip6_ct_skip_exthdr(skb, inip6off + sizeof(inip6h), + &inprotonum, + skb->len - inip6off - sizeof(inip6h)); + + if (inprotoff < 0 || inprotoff > skb->len + || inprotonum == NEXTHDR_FRAGMENT) { + DEBUGP("icmp6_error: Can't find protocol header in ICMPv6 payload.\n"); + return NULL; + } + + inproto = ip6_ct_find_proto(inprotonum); + + /* Are they talking about one of our connections? */ + if (!ip6_get_tuple(&inip6h, skb, inprotoff, inprotonum, + &origtuple, inproto)) { + DEBUGP("icmp6_error: ! get_tuple p=%u\n", inprotonum); + return NULL; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!invert_tuple(&intuple, &origtuple, inproto)) { + DEBUGP("icmp6_error_track: Can't invert tuple\n"); + return NULL; + } + + *ctinfo = IP6_CT_RELATED; + + h = ip6_conntrack_find_get(&intuple, NULL); + if (!h) { + DEBUGP("icmp6_error_track: no match\n"); + return NULL; + } else { + if (DIRECTION(h) == IP6_CT_DIR_REPLY) + *ctinfo += IP6_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &h->ctrack->ct_general; + return h->ctrack; +} + +/* There's a small race here where we may free a just-assured + connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct ip6_conntrack_tuple_hash *i) +{ + return !(test_bit(IP6S_ASSURED_BIT, &i->ctrack->status)); +} + +static int early_drop(struct list_head *chain) +{ + /* Traverse backwards: gives us oldest, which is roughly LRU */ + struct ip6_conntrack_tuple_hash *h; + int dropped = 0; + + READ_LOCK(&ip6_conntrack_lock); + h = LIST_FIND_B(chain, unreplied, struct ip6_conntrack_tuple_hash *); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip6_conntrack_lock); + + if (!h) + return dropped; + + if (del_timer(&h->ctrack->timeout)) { + death_by_timeout((unsigned long)h->ctrack); + dropped = 1; + } + ip6_conntrack_put(h->ctrack); + return dropped; +} + +static inline int helper_cmp(const struct ip6_conntrack_helper *i, + const struct ip6_conntrack_tuple *rtuple) +{ + return ip6_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + +struct ip6_conntrack_helper * +ip6_ct_find_helper(const struct ip6_conntrack_tuple *tuple){ + + MUST_BE_READ_LOCKED(&ip6_conntrack_lock); + return LIST_FIND(&helpers, helper_cmp, + struct ip6_conntrack_helper *, + tuple); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + failed due to stress. Otherwise it really is unclassifiable. */ +static struct ip6_conntrack_tuple_hash * +init_conntrack(const struct ip6_conntrack_tuple *tuple, + struct ip6_conntrack_protocol *protocol, + struct sk_buff *skb, + unsigned int protoff) +{ + struct ip6_conntrack *conntrack; + struct ip6_conntrack_tuple repl_tuple; + size_t hash; + struct ip6_conntrack_expect *expected; + static unsigned int drop_next = 0; + + if (!ip6_conntrack_hash_rnd_initted) { + get_random_bytes(&ip6_conntrack_hash_rnd, 4); + ip6_conntrack_hash_rnd_initted = 1; + } + + hash = hash_conntrack(tuple); + + if (ip6_conntrack_max && + atomic_read(&ip6_conntrack_count) >= ip6_conntrack_max) { + /* Try dropping from random chain, or else from the + chain about to put into (in case they're trying to + bomb one hash chain). */ + unsigned int next = (drop_next++)%ip6_conntrack_htable_size; + + if (!early_drop(&ip6_conntrack_hash[next]) + && !early_drop(&ip6_conntrack_hash[hash])) { + if (net_ratelimit()) + printk(KERN_WARNING + "ip6_conntrack: table full, dropping" + " packet.\n"); + return ERR_PTR(-ENOMEM); + } + } + + if (!invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + conntrack = kmem_cache_alloc(ip6_conntrack_cachep, GFP_ATOMIC); + if (!conntrack) { + DEBUGP("Can't allocate conntrack.\n"); + return ERR_PTR(-ENOMEM); + } + + memset(conntrack, 0, sizeof(*conntrack)); + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[IP6_CT_DIR_ORIGINAL].tuple = *tuple; + conntrack->tuplehash[IP6_CT_DIR_ORIGINAL].ctrack = conntrack; + conntrack->tuplehash[IP6_CT_DIR_REPLY].tuple = repl_tuple; + conntrack->tuplehash[IP6_CT_DIR_REPLY].ctrack = conntrack; + + if (!protocol->new(conntrack, skb, protoff)) { + kmem_cache_free(ip6_conntrack_cachep, conntrack); + return NULL; + } + /* Don't set timer yet: wait for confirmation */ + init_timer(&conntrack->timeout); + conntrack->timeout.data = (unsigned long)conntrack; + conntrack->timeout.function = death_by_timeout; + + INIT_LIST_HEAD(&conntrack->sibling_list); + + WRITE_LOCK(&ip6_conntrack_lock); + /* Need finding and deleting of expected ONLY if we win race */ + READ_LOCK(&ip6_conntrack_expect_tuple_lock); + expected = LIST_FIND(&ip6_conntrack_expect_list, expect_cmp, + struct ip6_conntrack_expect *, tuple); + READ_UNLOCK(&ip6_conntrack_expect_tuple_lock); + + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (expected && !is_confirmed(expected->expectant)) + expected = NULL; + + /* Look up the conntrack helper for master connections only */ + if (!expected) + conntrack->helper = ip6_ct_find_helper(&repl_tuple); + + /* If the expectation is dying, then this is a loser. */ + if (expected + && expected->expectant->helper->timeout + && ! del_timer(&expected->timeout)) + expected = NULL; + + if (expected) { + DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, expected); + /* Welcome, Mr. Bond. We've been expecting you... */ + IP6_NF_ASSERT(master_ct6(conntrack)); + __set_bit(IP6S_EXPECTED_BIT, &conntrack->status); + conntrack->master = expected; + expected->sibling = conntrack; + LIST_DELETE(&ip6_conntrack_expect_list, expected); + expected->expectant->expecting--; + nf_conntrack_get(&master_ct6(conntrack)->ct_general); + } + atomic_inc(&ip6_conntrack_count); + WRITE_UNLOCK(&ip6_conntrack_lock); + + if (expected && expected->expectfn) + expected->expectfn(conntrack); + return &conntrack->tuplehash[IP6_CT_DIR_ORIGINAL]; +} + +/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +static inline struct ip6_conntrack * +resolve_normal_ct(struct sk_buff *skb, + unsigned int protoff, + u_int16_t protonum, + struct ip6_conntrack_protocol *proto, + int *set_reply, + unsigned int hooknum, + enum ip6_conntrack_info *ctinfo) +{ + struct ip6_conntrack_tuple tuple; + struct ip6_conntrack_tuple_hash *h; + + if (!ip6_get_tuple(skb->nh.ipv6h, skb, protoff, protonum, &tuple, proto)) + return NULL; + + /* look for tuple match */ + h = ip6_conntrack_find_get(&tuple, NULL); + if (!h) { + h = init_conntrack(&tuple, proto, skb, protoff); + if (!h) + return NULL; + if (IS_ERR(h)) + return (void *)h; + } + + /* It exists; we have (non-exclusive) reference. */ + if (DIRECTION(h) == IP6_CT_DIR_REPLY) { + *ctinfo = IP6_CT_ESTABLISHED + IP6_CT_IS_REPLY; + /* Please set reply bit if this packet OK */ + *set_reply = 1; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (test_bit(IP6S_SEEN_REPLY_BIT, &h->ctrack->status)) { + DEBUGP("ip6_conntrack_in: normal packet for %p\n", + h->ctrack); + *ctinfo = IP6_CT_ESTABLISHED; + } else if (test_bit(IP6S_EXPECTED_BIT, &h->ctrack->status)) { + DEBUGP("ip6_conntrack_in: related packet for %p\n", + h->ctrack); + *ctinfo = IP6_CT_RELATED; + } else { + DEBUGP("ip6_conntrack_in: new packet for %p\n", + h->ctrack); + *ctinfo = IP6_CT_NEW; + } + *set_reply = 0; + } + skb->nfct = &h->ctrack->ct_general; + skb->nfctinfo = *ctinfo; + return h->ctrack; +} + +/* Netfilter hook itself. */ +unsigned int ip6_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip6_conntrack *ct; + enum ip6_conntrack_info ctinfo; + struct ip6_conntrack_protocol *proto; + int set_reply; + int ret; + u_int8_t protonum; + int len; + int daddr_type; + int protoff, extoff; + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + + /* Ignore multicast - kozakai */ + daddr_type = ipv6_addr_type(&(*pskb)->nh.ipv6h->daddr); + if (daddr_type & IPV6_ADDR_MULTICAST) + return NF_ACCEPT; + + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + extoff = (u8*)((*pskb)->nh.ipv6h+1) - (*pskb)->data; + len = (*pskb)->len - extoff; + + /* Verify that a protocol is present and get the protocol handler + we need */ + protonum = (*pskb)->nh.ipv6h->nexthdr; + protoff = ip6_ct_skip_exthdr(*pskb, extoff, &protonum, len); + + /* + * Notice! (protoff == (*pskb)->len) mean that this packet doesn't + * have no data except of IPv6 & ext headers. but tracked anyway. + * - kozakai + */ + if (protoff < 0 || protoff > (*pskb)->len + || protonum == NEXTHDR_FRAGMENT) { + DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); + return NF_ACCEPT; + } + + /* It may be an icmp error... */ + if (protonum == IPPROTO_ICMPV6 + && icmp6_error_track(*pskb, protoff, &ctinfo, hooknum)) + return NF_ACCEPT; + + proto = ip6_ct_find_proto(protonum); + + if (!(ct = resolve_normal_ct(*pskb, protoff, protonum, proto, + &set_reply, hooknum,&ctinfo))) + /* Not valid part of a connection */ + return NF_ACCEPT; + + if (IS_ERR(ct)) + /* Too stressed to deal. */ + return NF_DROP; + + IP6_NF_ASSERT((*pskb)->nfct); + + ret = proto->packet(ct, *pskb, protoff, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + + if (ret != NF_DROP && ct->helper) { + ret = ct->helper->help(*pskb, protoff, ct, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + } + if (set_reply) + set_bit(IP6S_SEEN_REPLY_BIT, &ct->status); + + return ret; +} + +int ip6_invert_tuplepr(struct ip6_conntrack_tuple *inverse, + const struct ip6_conntrack_tuple *orig) +{ + return invert_tuple(inverse, orig, ip6_ct_find_proto(orig->dst.protonum)); +} + +static inline int resent_expect(const struct ip6_conntrack_expect *i, + const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *mask) +{ + DEBUGP("resent_expect\n"); + DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple); + DEBUGP("test tuple: "); DUMP_TUPLE(tuple); + return (ip6_ct_tuple_equal(&i->tuple, tuple) + && ip6_ct_tuple_equal(&i->mask, mask)); +} + +static struct in6_addr * +or_addr6_bits(struct in6_addr *result, const struct in6_addr *one, + const struct in6_addr *two) +{ + + int count = 0; + + for (count = 0; count < 8; count++) + result->s6_addr16[count] = ntohs(one->s6_addr16[count]) + & ntohs(two->s6_addr16[count]); + + return result; +} + +/* Would two expected things clash? */ +static inline int expect_clash(const struct ip6_conntrack_expect *i, + const struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *mask) +{ + /* Part covered by intersection of masks must be unequal, + otherwise they clash */ + struct ip6_conntrack_tuple intersect_mask; + + intersect_mask.src.u.all = i->mask.src.u.all & mask->src.u.all; + intersect_mask.dst.u.all = i->mask.dst.u.all & mask->dst.u.all; + intersect_mask.dst.protonum = i->mask.dst.protonum + & mask->dst.protonum; + + or_addr6_bits(&intersect_mask.src.ip, &i->mask.src.ip, + &mask->src.ip); + or_addr6_bits(&intersect_mask.dst.ip, &i->mask.dst.ip, + &mask->dst.ip); + + return ip6_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask); +} + +inline void ip6_conntrack_unexpect_related(struct ip6_conntrack_expect *expect) +{ + WRITE_LOCK(&ip6_conntrack_lock); + unexpect_related(expect); + WRITE_UNLOCK(&ip6_conntrack_lock); +} + +static void expectation_timed_out(unsigned long ul_expect) +{ + struct ip6_conntrack_expect *expect = (void *) ul_expect; + + DEBUGP("expectation %p timed out\n", expect); + WRITE_LOCK(&ip6_conntrack_lock); + __unexpect_related(expect); + WRITE_UNLOCK(&ip6_conntrack_lock); +} + +/* Add a related connection. */ +int ip6_conntrack_expect_related(struct ip6_conntrack *related_to, + struct ip6_conntrack_expect *expect) +{ + struct ip6_conntrack_expect *old, *new; + int ret = 0; + + WRITE_LOCK(&ip6_conntrack_lock); + /* Because of the write lock, no reader can walk the lists, + * so there is no need to use the tuple lock too */ + + DEBUGP("ip6_conntrack_expect_related %p\n", related_to); + DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); + DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); + + old = LIST_FIND(&ip6_conntrack_expect_list, resent_expect, + struct ip6_conntrack_expect *, &expect->tuple, + &expect->mask); + if (old) { + /* Helper private data may contain offsets but no pointers + pointing into the payload - otherwise we should have to copy + the data filled out by the helper over the old one */ + DEBUGP("expect_related: resent packet\n"); + if (related_to->helper->timeout) { + if (!del_timer(&old->timeout)) { + /* expectation is dying. Fall through */ + old = NULL; + } else { + old->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&old->timeout); + } + } + + if (old) { + WRITE_UNLOCK(&ip6_conntrack_lock); + return -EEXIST; + } + } else if (related_to->helper->max_expected && + related_to->expecting >= related_to->helper->max_expected) { + struct list_head *cur_item; + /* old == NULL */ + if (!(related_to->helper->flags & + IP6_CT_HELPER_F_REUSE_EXPECT)) { + WRITE_UNLOCK(&ip6_conntrack_lock); + if (net_ratelimit()) + printk(KERN_WARNING + "ip6_conntrack: max number of expected " + "connections %i of %s for " + "%x:%x:%x:%x:%x:%x:%x:%x->%x:%x:%x:%x:%x:%x:%x:%x\n", + related_to->helper->max_expected, + related_to->helper->name, + NIP6(related_to->tuplehash[IP6_CT_DIR_ORIGINAL].tuple.src.ip), + NIP6(related_to->tuplehash[IP6_CT_DIR_ORIGINAL].tuple.dst.ip)); + return -EPERM; + } + DEBUGP("ip6_conntrack: max number of expected " + "connections %i of %s reached for " + "%x:%x:%x:%x:%x:%x:%x:%x->%x:%x:%x:%x:%x:%x:%x:%x, reusing\n", + related_to->helper->max_expected, + related_to->helper->name, + NIP6(related_to->tuplehash[IP6_CT_DIR_ORIGINAL].tuple.src.ip), + NIP6(related_to->tuplehash[IP6_CT_DIR_ORIGINAL].tuple.dst.ip)); + + /* choose the the oldest expectation to evict */ + list_for_each(cur_item, &related_to->sibling_list) { + struct ip6_conntrack_expect *cur; + + cur = list_entry(cur_item, + struct ip6_conntrack_expect, + expected_list); + if (cur->sibling == NULL) { + old = cur; + break; + } + } + + /* (!old) cannot happen, since related_to->expecting is the + * number of unconfirmed expects */ + IP6_NF_ASSERT(old); + + /* newnat14 does not reuse the real allocated memory + * structures but rather unexpects the old and + * allocates a new. unexpect_related will decrement + * related_to->expecting. + */ + unexpect_related(old); + ret = -EPERM; + } else if (LIST_FIND(&ip6_conntrack_expect_list, expect_clash, + struct ip6_conntrack_expect *, &expect->tuple, + &expect->mask)) { + WRITE_UNLOCK(&ip6_conntrack_lock); + DEBUGP("expect_related: busy!\n"); + return -EBUSY; + } + + new = (struct ip6_conntrack_expect *) + kmalloc(sizeof(struct ip6_conntrack_expect), GFP_ATOMIC); + if (!new) { + WRITE_UNLOCK(&ip6_conntrack_lock); + DEBUGP("expect_relaed: OOM allocating expect\n"); + return -ENOMEM; + } + + DEBUGP("new expectation %p of conntrack %p\n", new, related_to); + memcpy(new, expect, sizeof(*expect)); + new->expectant = related_to; + new->sibling = NULL; + atomic_set(&new->use, 1); + + /* add to expected list for this connection */ + list_add(&new->expected_list, &related_to->sibling_list); + /* add to global list of expectations */ + list_prepend(&ip6_conntrack_expect_list, &new->list); + /* add and start timer if required */ + if (related_to->helper->timeout) { + init_timer(&new->timeout); + new->timeout.data = (unsigned long)new; + new->timeout.function = expectation_timed_out; + new->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&new->timeout); + } + related_to->expecting++; + + WRITE_UNLOCK(&ip6_conntrack_lock); + + return ret; +} + + +/* Is this code needed ? this is for NAT. - kozakai */ +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +int ip6_conntrack_alter_reply(struct ip6_conntrack *conntrack, + const struct ip6_conntrack_tuple *newreply) +{ + WRITE_LOCK(&ip6_conntrack_lock); + if (__ip6_conntrack_find(newreply, conntrack)) { + WRITE_UNLOCK(&ip6_conntrack_lock); + return 0; + } + /* Should be unconfirmed, so not in hash table yet */ + IP6_NF_ASSERT(!is_confirmed(conntrack)); + + DEBUGP("Altering reply tuple of %p to ", conntrack); + DUMP_TUPLE(newreply); + + conntrack->tuplehash[IP6_CT_DIR_REPLY].tuple = *newreply; + if (!conntrack->master) + conntrack->helper = ip6_ct_find_helper(newreply); + WRITE_UNLOCK(&ip6_conntrack_lock); + + return 1; +} + +int ip6_conntrack_helper_register(struct ip6_conntrack_helper *me) +{ + WRITE_LOCK(&ip6_conntrack_lock); + list_prepend(&helpers, me); + WRITE_UNLOCK(&ip6_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct ip6_conntrack_tuple_hash *i, + const struct ip6_conntrack_helper *me) +{ + if (i->ctrack->helper == me) { + /* Get rid of any expected. */ + remove_expectations(i->ctrack, 0); + /* And *then* set helper to NULL */ + i->ctrack->helper = NULL; + } + return 0; +} + +void ip6_conntrack_helper_unregister(struct ip6_conntrack_helper *me) +{ + unsigned int i; + + /* Need write lock here, to delete helper. */ + WRITE_LOCK(&ip6_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expecteds, set helpers to NULL. */ + for (i = 0; i < ip6_conntrack_htable_size; i++) + LIST_FIND_W(&ip6_conntrack_hash[i], unhelp, + struct ip6_conntrack_tuple_hash *, me); + WRITE_UNLOCK(&ip6_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + synchronize_net(); +} + +/* Refresh conntrack for this many jiffies. */ +void ip6_ct_refresh(struct ip6_conntrack *ct, unsigned long extra_jiffies) +{ + IP6_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + + WRITE_LOCK(&ip6_conntrack_lock); + /* If not in hash table, timer will not be active yet */ + if (!is_confirmed(ct)) + ct->timeout.expires = extra_jiffies; + else { + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + } + } + WRITE_UNLOCK(&ip6_conntrack_lock); +} + +/* Used by ip6t_REJECT. */ +static void ip6_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) +{ + struct ip6_conntrack *ct; + enum ip6_conntrack_info ctinfo; + + ct = ip6_conntrack_get(skb, &ctinfo); + + /* This ICMP is in reverse direction to the packet which + caused it */ + if (CTINFO2DIR(ctinfo) == IP6_CT_DIR_ORIGINAL) + ctinfo = IP6_CT_RELATED + IP6_CT_IS_REPLY; + else + ctinfo = IP6_CT_RELATED; + + /* Attach new skbuff, and increment count */ + nskb->nfct = &ct->ct_general; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nskb->nfct); +} + +static inline int +do_kill(const struct ip6_conntrack_tuple_hash *i, + int (*kill)(const struct ip6_conntrack *i, void *data), + void *data) +{ + return kill(i->ctrack, data); +} + +/* Bring out ya dead! */ +static struct ip6_conntrack_tuple_hash * +get_next_corpse(int (*kill)(const struct ip6_conntrack *i, void *data), + void *data) +{ + struct ip6_conntrack_tuple_hash *h = NULL; + unsigned int i; + + READ_LOCK(&ip6_conntrack_lock); + for (i = 0; !h && i < ip6_conntrack_htable_size; i++) { + h = LIST_FIND(&ip6_conntrack_hash[i], do_kill, + struct ip6_conntrack_tuple_hash *, kill, data); + } + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip6_conntrack_lock); + + return h; +} + +void +ip6_ct_selective_cleanup(int (*kill)(const struct ip6_conntrack *i, void *data), + void *data) +{ + struct ip6_conntrack_tuple_hash *h; + + /* This is order n^2, by the way. */ + while ((h = get_next_corpse(kill, data)) != NULL) { + /* Time to push up daises... */ + if (del_timer(&h->ctrack->timeout)) + death_by_timeout((unsigned long)h->ctrack); + /* ... else the timer will get him soon. */ + + ip6_conntrack_put(h->ctrack); + } +} + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct ip6_conntrack_tuple_hash *h; + struct ip6_conntrack_tuple tuple; + + memset(&tuple, 0, sizeof(tuple)); + ipv6_addr_copy(&tuple.src.ip, &np->rcv_saddr); + ipv6_addr_copy(&tuple.dst.ip, &np->daddr); + tuple.src.u.tcp.port = inet->sport; + tuple.dst.u.tcp.port = inet->dport; + tuple.dst.protonum = IPPROTO_TCP; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->sk_prot->name, "TCP")) { + DEBUGP("IPV6_NF_ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int) *len < sizeof(struct sockaddr_in6)) { + DEBUGP("IPV6_NF_ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in6)); + return -EINVAL; + } + + h = ip6_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in6 sin; + memset(&sin, 0, sizeof(sin)); + + sin.sin6_family = AF_INET6; + sin.sin6_port = h->ctrack->tuplehash[IP6_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + ipv6_addr_copy(&sin.sin6_addr, + &h->ctrack->tuplehash[IP6_CT_DIR_ORIGINAL] + .tuple.dst.ip); + /* FIXME: sin6_scope_id */ + + DEBUGP("IPV6_NF_ORIGINAL_DST: %x:%x:%x:%x:%x:%x:%x:%x %u\n", + NIP6(sin.sin6_addr), ntohs(sin.sin6_port)); + ip6_conntrack_put(h->ctrack); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("IPV6_NF_ORIGINAL_DST: Can't find %x:%x:%x:%x:%x:%x:%x:%x/%u-%x:%x:%x:%x:%x:%x:%x:%x/%u.\n", + NIP6(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + NIP6(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst = { + .pf = PF_INET6, + .get_optmin = IPV6_NF_ORIGINAL_DST, + .get_optmax = IPV6_NF_ORIGINAL_DST+1, + .get = &getorigdst, +}; + +#define NET_IP6_CONNTRACK_MAX 2089 +#define NET_IP6_CONNTRACK_MAX_NAME "ip6_conntrack_max" + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *ip6_conntrack_sysctl_header; + +static ctl_table ip6_conntrack_table[] = { + { + .ctl_name = NET_IP6_CONNTRACK_MAX, + .procname = NET_IP6_CONNTRACK_MAX_NAME, + .data = &ip6_conntrack_max, + .maxlen = sizeof(ip6_conntrack_max), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .ctl_name = 0 } +}; + +static ctl_table ip6_conntrack_dir_table[] = { + { + .ctl_name = NET_IPV6, + .procname = "ipv6", NULL, + .mode = 0555, + .child = ip6_conntrack_table + }, + { .ctl_name = 0 } +}; + +static ctl_table ip6_conntrack_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = ip6_conntrack_dir_table + }, + { .ctl_name = 0 } +}; +#endif /*CONFIG_SYSCTL*/ + +static int kill_all(const struct ip6_conntrack *i, void *data) +{ + return 1; +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip6_conntrack_cleanup(void) +{ +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(ip6_conntrack_sysctl_header); +#endif + ip6_ct_attach = NULL; + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + synchronize_net(); + + i_see_dead_people: + ip6_ct_selective_cleanup(kill_all, NULL); + if (atomic_read(&ip6_conntrack_count) != 0) { + schedule(); + goto i_see_dead_people; + } + + kmem_cache_destroy(ip6_conntrack_cachep); + vfree(ip6_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); +} + +static int hashsize = 0; +MODULE_PARM(hashsize, "i"); + +int __init ip6_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + if (hashsize) { + ip6_conntrack_htable_size = hashsize; + } else { + ip6_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + ip6_conntrack_htable_size = 8192; + if (ip6_conntrack_htable_size < 16) + ip6_conntrack_htable_size = 16; + } + ip6_conntrack_max = 8 * ip6_conntrack_htable_size; + + printk("ip6_conntrack version %s (%u buckets, %d max)" + " - %Zd bytes per conntrack\n", IP6_CONNTRACK_VERSION, + ip6_conntrack_htable_size, ip6_conntrack_max, + sizeof(struct ip6_conntrack)); + + ret = nf_register_sockopt(&so_getorigdst); + if (ret != 0) { + printk(KERN_ERR "Unable to register netfilter socket option\n"); + return ret; + } + + ip6_conntrack_hash = vmalloc(sizeof(struct list_head) + * ip6_conntrack_htable_size); + if (!ip6_conntrack_hash) { + printk(KERN_ERR "Unable to create ip6_conntrack_hash\n"); + goto err_unreg_sockopt; + } + + ip6_conntrack_cachep = kmem_cache_create("ip6_conntrack", + sizeof(struct ip6_conntrack), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ip6_conntrack_cachep) { + printk(KERN_ERR "Unable to create ip6_conntrack slab cache\n"); + goto err_free_hash; + } + /* Don't NEED lock here, but good form anyway. */ + WRITE_LOCK(&ip6_conntrack_lock); + /* Sew in builtin protocols. */ + list_append(&ip6_protocol_list, &ip6_conntrack_protocol_tcp); + list_append(&ip6_protocol_list, &ip6_conntrack_protocol_udp); + list_append(&ip6_protocol_list, &ip6_conntrack_protocol_icmpv6); + WRITE_UNLOCK(&ip6_conntrack_lock); + + for (i = 0; i < ip6_conntrack_htable_size; i++) + INIT_LIST_HEAD(&ip6_conntrack_hash[i]); + +/* This is fucking braindead. There is NO WAY of doing this without + the CONFIG_SYSCTL unless you don't want to detect errors. + Grrr... --RR */ +#ifdef CONFIG_SYSCTL + ip6_conntrack_sysctl_header + = register_sysctl_table(ip6_conntrack_root_table, 0); + if (ip6_conntrack_sysctl_header == NULL) { + goto err_free_ct_cachep; + } +#endif /*CONFIG_SYSCTL*/ + + /* For use by ip6t_REJECT */ + ip6_ct_attach = ip6_conntrack_attach; + return ret; + +#ifdef CONFIG_SYSCTL +err_free_ct_cachep: + kmem_cache_destroy(ip6_conntrack_cachep); +#endif /*CONFIG_SYSCTL*/ +err_free_hash: + vfree(ip6_conntrack_hash); +err_unreg_sockopt: + nf_unregister_sockopt(&so_getorigdst); + + return -ENOMEM; +} diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_ftp.c x1/net/ipv6/netfilter/ip6_conntrack_ftp.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_ftp.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_ftp.c 2003-10-10 07:28:11.000000000 +0200 @@ -0,0 +1,554 @@ +/* + * FTP extension for IPv6 connection tracking. + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_ftp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* FTP extension for IP6 connection tracking. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* This is slow, but it's simple. --RR */ +static char ftp_buffer[65536]; + +DECLARE_LOCK(ip6_ftp_lock); +struct module *ip6_conntrack_ftp = THIS_MODULE; + +#define MAX_PORTS 8 +static int ports[MAX_PORTS]; +static int ports_c = 0; +#ifdef MODULE_PARM +MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i"); +#endif + +static int loose = 0; +MODULE_PARM(loose, "i"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct cmd_info { + struct in6_addr ip; + u_int16_t port; +}; + +static int try_eprt(const char *, size_t, struct cmd_info *, char); +static int try_espv_response(const char *, size_t, struct cmd_info *, char); + +static struct ftp_search { + enum ip6_conntrack_dir dir; + const char *pattern; + size_t plen; + char skip; + char term; + enum ip6_ct_ftp_type ftptype; + int (*getnum)(const char *, size_t, struct cmd_info *, char); +} search[] = { + { + IP6_CT_DIR_ORIGINAL, + "EPRT", sizeof("EPRT") - 1, ' ', '\r', + IP6_CT_FTP_EPRT, + try_eprt, + }, + { + IP6_CT_DIR_REPLY, + "229 ", sizeof("229 ") - 1, '(', ')', + IP6_CT_FTP_EPSV, + try_espv_response, + }, +}; + +/* This code is based on inet_pton() in glibc-2.2.4 */ + +#define NS_IN6ADDRSZ 16 +#define NS_INADDRSZ 4 +#define NS_INT16SZ 2 + +/* + * return the length of string of address parse untill error, + * dlen or reaching terminal char - kozakai + */ +static int +get_ipv6_addr(const char *src, u_int8_t *dst, size_t dlen, u_int8_t term) +{ + static const char xdigits[] = "0123456789abcdef"; + u_int8_t tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp; + const char *curtok; + int ch, saw_xdigit; + u_int32_t val; + size_t clen = 0; + + tp = memset(tmp, '\0', NS_IN6ADDRSZ); + endp = tp + NS_IN6ADDRSZ; + colonp = NULL; + + /* Leading :: requires some special handling. */ + if (*src == ':'){ + if (*++src != ':') + return (0); + clen++; + } + + curtok = src; + saw_xdigit = 0; + val = 0; + while ((clen < dlen) && (*src != term)) { + const char *pch; + + ch = tolower (*src++); + clen++; + + pch = strchr(xdigits, ch); + if (pch != NULL) { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return (0); + + saw_xdigit = 1; + continue; + } + if (ch == ':') { + curtok = src; + if (!saw_xdigit) { + if (colonp) + return (0); + colonp = tp; + continue; + } else if (*src == term) { + return (0); + } + if (tp + NS_INT16SZ > endp) + return (0); + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + saw_xdigit = 0; + val = 0; + continue; + } + return (0); + } + if (saw_xdigit) { + if (tp + NS_INT16SZ > endp) + return (0); + + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + } + if (colonp != NULL) { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + if (tp == endp) + return (0); + + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp || (*src != term)) + return (0); + + memcpy(dst, tmp, NS_IN6ADDRSZ); + return clen; +} + +/* return length of port if succeed. */ +static int get_port(const char *data, u_int16_t *port, size_t dlen, char term) +{ + int i; + u_int16_t tmp_port = 0; + + for(i = 0; i < dlen; i++) { + /* Finished? */ + if(data[i] == term){ + *port = htons(tmp_port); + return i; + } + + if(data[i] < '0' || data[i] > '9') + return 0; + + tmp_port = tmp_port*10 + (data[i] - '0'); + } + return 0; +} + +/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */ +static int try_eprt(const char *data, size_t dlen, struct cmd_info *cmd, + char term) +{ + char delim; + int len; + int addr_len; + + /* First character is delimiter, then "1" for IPv4, then + delimiter again. */ + + if (dlen <= 3) + return 0; + + delim = data[0]; + + if (isdigit(delim) || delim < 33 || delim > 126 + || data[1] != '2' || data[2] != delim){ + return 0; + } + DEBUGP("Got %c2%c\n", delim, delim); + + len = 3; + + /* Now we have IP address. */ + addr_len = get_ipv6_addr(&data[len], cmd->ip.s6_addr, + dlen - len, delim); + + if (addr_len == 0) + return 0; + + len += addr_len + 1; + + DEBUGP("Got IPv6 address!\n"); + + addr_len = get_port(&data[len], &cmd->port, dlen, delim); + + if(addr_len == 0) + return 0; + + len += addr_len + 1; + + return len; +} + +/* Returns 0, or length of numbers: |||6446| */ +static int try_espv_response(const char *data, size_t dlen, + struct cmd_info *cmd, char term) +{ + char delim; + size_t len; + + /* Three delimiters. */ + if (dlen <= 3) + return 0; + + delim = data[0]; + + if (isdigit(delim) || delim < 33 || delim > 126 + || data[1] != delim || data[2] != delim) + return 0; + + len = get_port(&data[3], &cmd->port, dlen, delim); + + if(len == 0) + return 0; + + return 3 + len + 1; +} + +/* Return 1 for match, 0 for accept, -1 for partial. */ +static int find_pattern(const char *data, size_t dlen, + const char *pattern, size_t plen, + char skip, char term, + unsigned int *numoff, + unsigned int *numlen, + struct cmd_info *cmd, + int (*getnum)(const char *, size_t, struct cmd_info *, + char)) +{ + size_t i; + + DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); + if (dlen == 0) + return 0; + + if (dlen <= plen) { + /* Short packet: try for partial? */ + if (strnicmp(data, pattern, dlen) == 0) + return -1; + else return 0; + } + + if (strnicmp(data, pattern, plen) != 0) { +#if 0 + size_t i; + + DEBUGP("ftp: string mismatch\n"); + for (i = 0; i < plen; i++) { + DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); + } +#endif + return 0; + } + + DEBUGP("Pattern matches!\n"); + /* Now we've found the constant string, try to skip + to the 'skip' character */ + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; + + /* Skip over the last character */ + i++; + + DEBUGP("Skipped up to `%c'!\n", skip); + + *numoff = i; + *numlen = getnum(data + i, dlen - i, cmd, term); + if (!*numlen) + return -1; + + DEBUGP("Match succeeded!\n"); + return 1; +} + +static int help(const struct sk_buff *skb, + unsigned int protoff, + struct ip6_conntrack *ct, + enum ip6_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + struct tcphdr tcph; + u_int32_t old_seq_aft_nl; + int old_seq_aft_nl_set, ret; + int dir = CTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff; + struct ip6_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info; + struct ip6_conntrack_expect expect, *exp = &expect; + struct ip6_ct_ftp_expect *exp_ftp_info = &exp->help.exp_ftp_info; + + unsigned int i; + int found = 0; + + struct ipv6hdr *ipv6h = skb->nh.ipv6h; + struct ip6_conntrack_tuple *t = &exp->tuple, *mask = &exp->mask; + struct cmd_info cmd; + unsigned int csum; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP6_CT_ESTABLISHED + && ctinfo != IP6_CT_ESTABLISHED+IP6_CT_IS_REPLY) { + DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + return NF_ACCEPT; + } + + if (skb_copy_bits(skb, protoff, &tcph, sizeof(tcph)) != 0) + return NF_ACCEPT; + + dataoff = protoff + tcph.doff * 4; + /* No data? */ + if (dataoff >= skb->len) { + DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, skb->len); + return NF_ACCEPT; + } + datalen = skb->len - dataoff; + + LOCK_BH(&ip6_ftp_lock); + + csum = skb_copy_and_csum_bits(skb, dataoff, ftp_buffer, + skb->len - dataoff, 0); + csum = skb_checksum(skb, protoff, tcph.doff * 4, csum); + + /* Checksum invalid? Ignore. */ + /* FIXME: Source route IP option packets --RR */ + if (csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len - protoff, + IPPROTO_TCP, csum)) { + DEBUGP("ftp_help: bad csum: %p %u\n" + "%x:%x:%x:%x:%x:%x:%x:%x -> %x:%x:%x:%x:%x:%x:%x:%x\n", + &tcph, skb->len - protoff, NIP6(ipv6h->saddr), + NIP6(ipv6h->daddr)); + ret = NF_ACCEPT; + goto out; + } + + old_seq_aft_nl_set = ct_ftp_info->seq_aft_nl_set[dir]; + old_seq_aft_nl = ct_ftp_info->seq_aft_nl[dir]; + + DEBUGP("conntrack_ftp: datalen %u\n", datalen); + if (ftp_buffer[datalen - 1] == '\n') { + DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen); + if (!old_seq_aft_nl_set + || after(ntohl(tcph.seq) + datalen, old_seq_aft_nl)) { + DEBUGP("conntrack_ftp: updating nl to %u\n", + ntohl(tcph.seq) + datalen); + ct_ftp_info->seq_aft_nl[dir] = + ntohl(tcph.seq) + datalen; + ct_ftp_info->seq_aft_nl_set[dir] = 1; + } + } + + if(!old_seq_aft_nl_set || + (ntohl(tcph.seq) != old_seq_aft_nl)) { + DEBUGP("ip6_conntrack_ftp_help: wrong seq pos %s(%u)\n", + old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); + ret = NF_ACCEPT; + goto out; + } + + /* Initialize IP array to expected address (it's not mentioned + in EPSV responses) */ + ipv6_addr_copy(&cmd.ip, &ct->tuplehash[dir].tuple.src.ip); + + for (i = 0; i < ARRAY_SIZE(search); i++) { + if (search[i].dir != dir) continue; + + found = find_pattern(ftp_buffer, datalen, + search[i].pattern, + search[i].plen, + search[i].skip, + search[i].term, + &matchoff, &matchlen, + &cmd, + search[i].getnum); + if (found) break; + } + if (found == -1) { + /* We don't usually drop packets. After all, this is + connection tracking, not packet filtering. + However, it is neccessary for accurate tracking in + this case. */ + if (net_ratelimit()) + printk("conntrack_ftp: partial %s %u+%u\n", + search[i].pattern, + ntohl(tcph.seq), datalen); + ret = NF_DROP; + goto out; + } else if (found == 0) { /* No match */ + ret = NF_ACCEPT; + goto out; + } + + DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + (int)matchlen, ftp_buffer + matchoff, + matchlen, ntohl(tcph.seq) + matchoff); + + memset(&expect, 0, sizeof(expect)); + + /* Update the ftp info */ + if (!ipv6_addr_cmp(&cmd.ip, &ct->tuplehash[dir].tuple.src.ip)) { + exp->seq = ntohl(tcph.seq) + matchoff; + exp_ftp_info->len = matchlen; + exp_ftp_info->ftptype = search[i].ftptype; + exp_ftp_info->port = cmd.port; + } else { + /* + This situation is occurred with NAT. + */ + if (!loose) { + ret = NF_ACCEPT; + goto out; + } + } + + ipv6_addr_copy(&t->src.ip, &ct->tuplehash[!dir].tuple.src.ip); + ipv6_addr_copy(&t->dst.ip, &cmd.ip); + t->src.u.tcp.port = 0; + t->dst.u.tcp.port = cmd.port; + t->dst.protonum = IPPROTO_TCP; + + ipv6_addr_set(&mask->src.ip, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + mask->src.u.tcp.port = 0; + mask->dst.u.tcp.port = 0xFFFF; + mask->dst.protonum = 0xFFFF; + + exp->expectfn = NULL; + + /* Ignore failure; should only happen with NAT */ + ip6_conntrack_expect_related(ct, &expect); + ret = NF_ACCEPT; + out: + UNLOCK_BH(&ip6_ftp_lock); + return ret; +} + +static struct ip6_conntrack_helper ftp[MAX_PORTS]; +static char ftp_names[MAX_PORTS][10]; + +/* Not __exit: called from init() */ +static void fini(void) +{ + int i; + for (i = 0; i < ports_c; i++) { + DEBUGP("ip6_ct_ftp: unregistering helper for port %d\n", + ports[i]); + ip6_conntrack_helper_unregister(&ftp[i]); + } +} + +static int __init init(void) +{ + int i, ret; + char *tmpname; + + if (ports[0] == 0) + ports[0] = FTP_PORT; + + for (i = 0; (i < MAX_PORTS) && ports[i]; i++) { + memset(&ftp[i], 0, sizeof(struct ip6_conntrack_helper)); + ftp[i].tuple.src.u.tcp.port = htons(ports[i]); + ftp[i].tuple.dst.protonum = IPPROTO_TCP; + ftp[i].mask.src.u.tcp.port = 0xFFFF; + ftp[i].mask.dst.protonum = 0xFFFF; + ftp[i].max_expected = 1; + ftp[i].timeout = 0; + ftp[i].flags = IP6_CT_HELPER_F_REUSE_EXPECT; + ftp[i].me = ip6_conntrack_ftp; + ftp[i].help = help; + + tmpname = &ftp_names[i][0]; + if (ports[i] == FTP_PORT) + sprintf(tmpname, "ftp"); + else + sprintf(tmpname, "ftp-%d", ports[i]); + ftp[i].name = tmpname; + + DEBUGP("ip6_ct_ftp: registering helper for port %d\n", + ports[i]); + ret = ip6_conntrack_helper_register(&ftp[i]); + + if (ret) { + fini(); + return ret; + } + ports_c++; + } + return 0; +} + + +PROVIDES_CONNTRACK6(ftp); +EXPORT_SYMBOL(ip6_ftp_lock); +MODULE_LICENSE("GPL"); +module_init(init); +module_exit(fini); diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_generic.c x1/net/ipv6/netfilter/ip6_conntrack_proto_generic.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_generic.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_proto_generic.c 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,82 @@ +/* + * IPv6 generic protocol extension for IPv6 connection tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_proto_generic.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#define GENERIC_TIMEOUT (600*HZ) + +static int generic_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip6_conntrack_tuple *tuple) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +static int generic_invert_tuple(struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *orig) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int generic_print_tuple(char *buffer, + const struct ip6_conntrack_tuple *tuple) +{ + return 0; +} + +/* Print out the private part of the conntrack. */ +static unsigned int generic_print_conntrack(char *buffer, + const struct ip6_conntrack *state) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int established(struct ip6_conntrack *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip6_conntrack_info conntrackinfo) +{ + ip6_ct_refresh(conntrack, GENERIC_TIMEOUT); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int +new(struct ip6_conntrack *conntrack, + const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +struct ip6_conntrack_protocol ip6_conntrack_generic_protocol += { { NULL, NULL }, 0, "unknown", + generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple, + generic_print_conntrack, established, new, NULL, NULL, NULL }; + diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_icmpv6.c x1/net/ipv6/netfilter/ip6_conntrack_proto_icmpv6.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_icmpv6.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_proto_icmpv6.c 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,135 @@ +/* + * ICMPv6 extension for IPv6 connection tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_proto_icmp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define ICMPV6_TIMEOUT (30*HZ) + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip6_conntrack_tuple *tuple) +{ + struct icmp6hdr hdr; + + if (skb_copy_bits(skb, dataoff, &hdr, sizeof(hdr)) != 0) + return 0; + tuple->dst.u.icmpv6.type = hdr.icmp6_type; + tuple->src.u.icmpv6.id = hdr.icmp6_identifier; + tuple->dst.u.icmpv6.code = hdr.icmp6_code; + + return 1; +} + +static int icmpv6_invert_tuple(struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *orig) +{ + /* Add 1; spaces filled with 0. */ + static u_int8_t invmap[] = { + [ICMPV6_ECHO_REQUEST] = ICMPV6_ECHO_REPLY + 1, + [ICMPV6_ECHO_REPLY] = ICMPV6_ECHO_REQUEST + 1, + [ICMPV6_NI_QUERY] = ICMPV6_NI_QUERY + 1, + [ICMPV6_NI_REPLY] = ICMPV6_NI_REPLY +1 + }; + + if (orig->dst.u.icmpv6.type >= sizeof(invmap) + || !invmap[orig->dst.u.icmpv6.type]) + return 0; + + tuple->src.u.icmpv6.id = orig->src.u.icmpv6.id; + tuple->dst.u.icmpv6.type = invmap[orig->dst.u.icmpv6.type] - 1; + tuple->dst.u.icmpv6.code = orig->dst.u.icmpv6.code; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int icmpv6_print_tuple(char *buffer, + const struct ip6_conntrack_tuple *tuple) +{ + return sprintf(buffer, "type=%u code=%u id=%u ", + tuple->dst.u.icmpv6.type, + tuple->dst.u.icmpv6.code, + ntohs(tuple->src.u.icmpv6.id)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int icmpv6_print_conntrack(char *buffer, + const struct ip6_conntrack *conntrack) +{ + return sprintf(buffer, "count=%u ", + atomic_read(&conntrack->proto.icmpv6.count)); +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmpv6_packet(struct ip6_conntrack *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip6_conntrack_info ctinfo) +{ + /* Try to delete connection immediately after all replies: + won't actually vanish as we still have skb, and del_timer + means this will only run once even if count hits zero twice + (theoretically possible with SMP) */ + if (CTINFO2DIR(ctinfo) == IP6_CT_DIR_REPLY) { + if (atomic_dec_and_test(&ct->proto.icmpv6.count) + && del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + } else { + atomic_inc(&ct->proto.icmpv6.count); + ip6_ct_refresh(ct, ICMPV6_TIMEOUT); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int icmpv6_new(struct ip6_conntrack *conntrack, + const struct sk_buff *skb, + unsigned int dataoff) +{ + static u_int8_t valid_new[] = { + [ICMPV6_ECHO_REQUEST] = 1, + [ICMPV6_NI_QUERY] = 1 + }; + + if (conntrack->tuplehash[0].tuple.dst.u.icmpv6.type >= sizeof(valid_new) + || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmpv6.type]) { + /* Can't create a new ICMPV6 `conn' with this. */ + DEBUGP("icmpv6: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmpv6.type); + DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + return 0; + } + atomic_set(&conntrack->proto.icmpv6.count, 0); + return 1; +} + +struct ip6_conntrack_protocol ip6_conntrack_protocol_icmpv6 += { { NULL, NULL }, IPPROTO_ICMPV6, "icmpv6", + icmpv6_pkt_to_tuple, icmpv6_invert_tuple, icmpv6_print_tuple, + icmpv6_print_conntrack, icmpv6_packet, icmpv6_new, NULL, NULL, NULL }; diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_tcp.c x1/net/ipv6/netfilter/ip6_conntrack_proto_tcp.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_tcp.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_proto_tcp.c 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,273 @@ +/* + * TCP extension for IPv6 Connection Tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_proto_tcp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.tcp */ +static DECLARE_RWLOCK(tcp_lock); + +/* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR */ + +/* Actually, I believe that neither ipmasq (where this code is stolen + from) nor ipfilter do it exactly right. A new conntrack machine taking + into account packet loss (which creates uncertainty as to exactly + the conntrack of the connection) is required. RSN. --RR */ + +static const char *tcp_conntrack_names[] = { + "NONE", + "ESTABLISHED", + "SYN_SENT", + "SYN_RECV", + "FIN_WAIT", + "TIME_WAIT", + "CLOSE", + "CLOSE_WAIT", + "LAST_ACK", + "LISTEN" +}; + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + + +static unsigned long tcp_timeouts[] += { 30 MINS, /* TCP_CONNTRACK_NONE, */ + 5 DAYS, /* TCP_CONNTRACK_ESTABLISHED, */ + 2 MINS, /* TCP_CONNTRACK_SYN_SENT, */ + 60 SECS, /* TCP_CONNTRACK_SYN_RECV, */ + 2 MINS, /* TCP_CONNTRACK_FIN_WAIT, */ + 2 MINS, /* TCP_CONNTRACK_TIME_WAIT, */ + 10 SECS, /* TCP_CONNTRACK_CLOSE, */ + 60 SECS, /* TCP_CONNTRACK_CLOSE_WAIT, */ + 30 SECS, /* TCP_CONNTRACK_LAST_ACK, */ + 2 MINS, /* TCP_CONNTRACK_LISTEN, */ +}; + +#define sNO TCP_CONNTRACK_NONE +#define sES TCP_CONNTRACK_ESTABLISHED +#define sSS TCP_CONNTRACK_SYN_SENT +#define sSR TCP_CONNTRACK_SYN_RECV +#define sFW TCP_CONNTRACK_FIN_WAIT +#define sTW TCP_CONNTRACK_TIME_WAIT +#define sCL TCP_CONNTRACK_CLOSE +#define sCW TCP_CONNTRACK_CLOSE_WAIT +#define sLA TCP_CONNTRACK_LAST_ACK +#define sLI TCP_CONNTRACK_LISTEN +#define sIV TCP_CONNTRACK_MAX + +static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ +/*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI }, +/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI }, +/*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES }, +/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL }, +/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + }, + { +/* REPLY */ +/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ +/*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }, +/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI }, +/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI }, +/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI }, +/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + } +}; + +static int tcp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip6_conntrack_tuple *tuple) +{ + struct tcphdr hdr; + + /* Actually only need first 8 bytes. */ + if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0) + return 0; + + tuple->src.u.tcp.port = hdr.source; + tuple->dst.u.tcp.port = hdr.dest; + + return 1; +} + +static int tcp_invert_tuple(struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *orig) +{ + tuple->src.u.tcp.port = orig->dst.u.tcp.port; + tuple->dst.u.tcp.port = orig->src.u.tcp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int tcp_print_tuple(char *buffer, + const struct ip6_conntrack_tuple *tuple) +{ + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int tcp_print_conntrack(char *buffer, + const struct ip6_conntrack *conntrack) +{ + enum tcp_conntrack state; + + READ_LOCK(&tcp_lock); + state = conntrack->proto.tcp.state; + READ_UNLOCK(&tcp_lock); + + return sprintf(buffer, "%s ", tcp_conntrack_names[state]); +} + +static unsigned int get_conntrack_index(const struct tcphdr *tcph) +{ + if (tcph->rst) return 3; + else if (tcph->syn) return 0; + else if (tcph->fin) return 1; + else if (tcph->ack) return 2; + else return 4; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int tcp_packet(struct ip6_conntrack *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip6_conntrack_info ctinfo) +{ + enum tcp_conntrack newconntrack, oldtcpstate; + struct tcphdr tcph; + + if (skb_copy_bits(skb, dataoff, &tcph, sizeof(tcph)) != 0) + return -1; + + WRITE_LOCK(&tcp_lock); + oldtcpstate = conntrack->proto.tcp.state; + newconntrack + = tcp_conntracks + [CTINFO2DIR(ctinfo)] + [get_conntrack_index(&tcph)][oldtcpstate]; + + /* Invalid */ + if (newconntrack == TCP_CONNTRACK_MAX) { + DEBUGP("ip6_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n", + CTINFO2DIR(ctinfo), get_conntrack_index(&tcph), + conntrack->proto.tcp.state); + WRITE_UNLOCK(&tcp_lock); + return -1; + } + + conntrack->proto.tcp.state = newconntrack; + + /* Poor man's window tracking: record SYN/ACK for handshake check */ + if (oldtcpstate == TCP_CONNTRACK_SYN_SENT + && CTINFO2DIR(ctinfo) == IP6_CT_DIR_REPLY + && tcph.syn && tcph.ack) + conntrack->proto.tcp.handshake_ack + = htonl(ntohl(tcph.seq) + 1); + + /* If only reply is a RST, we can consider ourselves not to + have an established connection: this is a fairly common + problem case, so we can delete the conntrack + immediately. --RR */ + if (!test_bit(IP6S_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) { + WRITE_UNLOCK(&tcp_lock); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long)conntrack); + } else { + /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV */ + if (oldtcpstate == TCP_CONNTRACK_SYN_RECV + && CTINFO2DIR(ctinfo) == IP6_CT_DIR_ORIGINAL + && tcph.ack && !tcph.syn + && tcph.ack_seq == conntrack->proto.tcp.handshake_ack) + set_bit(IP6S_ASSURED_BIT, &conntrack->status); + + WRITE_UNLOCK(&tcp_lock); + ip6_ct_refresh(conntrack, tcp_timeouts[newconntrack]); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int tcp_new(struct ip6_conntrack *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + enum tcp_conntrack newconntrack; + struct tcphdr tcph; + + if (skb_copy_bits(skb, dataoff, &tcph, sizeof(tcph)) != 0) + return -1; + + /* Don't need lock here: this conntrack not in circulation yet */ + newconntrack + = tcp_conntracks[0][get_conntrack_index(&tcph)] + [TCP_CONNTRACK_NONE]; + + /* Invalid: delete conntrack */ + if (newconntrack == TCP_CONNTRACK_MAX) { + DEBUGP("ip6_conntrack_tcp: invalid new deleting.\n"); + return 0; + } + + conntrack->proto.tcp.state = newconntrack; + return 1; +} + +static int tcp_exp_matches_pkt(struct ip6_conntrack_expect *exp, + const struct sk_buff *skb, + unsigned int dataoff) +{ + struct tcphdr tcph; + unsigned int datalen; + + if (skb_copy_bits(skb, dataoff, &tcph, sizeof(tcph)) != 0) + return 0; + datalen = skb->len - dataoff; + + return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen); +} + +struct ip6_conntrack_protocol ip6_conntrack_protocol_tcp += { { NULL, NULL }, IPPROTO_TCP, "tcp", + tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack, + tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL }; diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_udp.c x1/net/ipv6/netfilter/ip6_conntrack_proto_udp.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_proto_udp.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_proto_udp.c 2003-09-18 12:48:46.000000000 +0200 @@ -0,0 +1,95 @@ +/* + * UDP extension for IPv6 Connection Tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_proto_udp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#define UDP_TIMEOUT (30*HZ) +#define UDP_STREAM_TIMEOUT (180*HZ) + +static int udp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip6_conntrack_tuple *tuple) +{ + struct udphdr hdr; + + /* Actually only need first 8 bytes. */ + if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0) + return 0; + + tuple->src.u.udp.port = hdr.source; + tuple->dst.u.udp.port = hdr.dest; + + return 1; +} + +static int udp_invert_tuple(struct ip6_conntrack_tuple *tuple, + const struct ip6_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int udp_print_tuple(char *buffer, + const struct ip6_conntrack_tuple *tuple) +{ + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int udp_print_conntrack(char *buffer, + const struct ip6_conntrack *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udp_packet(struct ip6_conntrack *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip6_conntrack_info conntrackinfo) +{ + /* If we've seen traffic both ways, this is some kind of UDP + stream. Extend timeout. */ + if (test_bit(IP6S_SEEN_REPLY_BIT, &conntrack->status)) { + ip6_ct_refresh(conntrack, UDP_STREAM_TIMEOUT); + /* Also, more likely to be important, and not a probe */ + set_bit(IP6S_ASSURED_BIT, &conntrack->status); + } else + ip6_ct_refresh(conntrack, UDP_TIMEOUT); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udp_new(struct ip6_conntrack *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +struct ip6_conntrack_protocol ip6_conntrack_protocol_udp += { { NULL, NULL }, IPPROTO_UDP, "udp", + udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack, + udp_packet, udp_new, NULL, NULL, NULL }; diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_reasm.c x1/net/ipv6/netfilter/ip6_conntrack_reasm.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_reasm.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_reasm.c 2003-10-16 04:48:59.000000000 +0200 @@ -0,0 +1,990 @@ +/* + * IPv6 fragment reassembly for connection tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv6/reassembly.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define IP6CT_FRAGS_HIGH_THRESH 262144 /* == 256*1024 */ +#define IP6CT_FRAGS_LOW_THRESH 196608 /* == 192*1024 */ +#define IP6CT_FRAGS_TIMEOUT IPV6_FRAG_TIMEOUT + +static int sysctl_ip6_ct_frag_high_thresh = 256*1024; +static int sysctl_ip6_ct_frag_low_thresh = 192*1024; +static int sysctl_ip6_ct_frag_time = IPV6_FRAG_TIMEOUT; + +struct ip6ct_frag_skb_cb +{ + struct inet6_skb_parm h; + int offset; + struct sk_buff *orig; +}; + +#define IP6CT_FRAG6_CB(skb) ((struct ip6ct_frag_skb_cb*)((skb)->cb)) + + +/* + * Equivalent of ipv4 struct ipq + */ + +struct ip6ct_frag_queue +{ + struct ip6ct_frag_queue *next; + struct list_head lru_list; /* lru list member */ + + __u32 id; /* fragment id */ + struct in6_addr saddr; + struct in6_addr daddr; + + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* expire timer */ + struct sk_buff *fragments; + int len; + int meat; + struct timeval stamp; + unsigned int csum; + __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 + __u16 nhoffset; + struct ip6ct_frag_queue **pprev; +}; + +/* Hash table. */ + +#define IP6CT_Q_HASHSZ 64 + +static struct ip6ct_frag_queue *ip6_ct_frag_hash[IP6CT_Q_HASHSZ]; +static rwlock_t ip6_ct_frag_lock = RW_LOCK_UNLOCKED; +static u32 ip6_ct_frag_hash_rnd; +static LIST_HEAD(ip6_ct_frag_lru_list); +int ip6_ct_frag_nqueues = 0; + +static __inline__ void __fq_unlink(struct ip6ct_frag_queue *fq) +{ + if(fq->next) + fq->next->pprev = fq->pprev; + *fq->pprev = fq->next; + list_del(&fq->lru_list); + ip6_ct_frag_nqueues--; +} + +static __inline__ void fq_unlink(struct ip6ct_frag_queue *fq) +{ + write_lock(&ip6_ct_frag_lock); + __fq_unlink(fq); + write_unlock(&ip6_ct_frag_lock); +} + +static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, + struct in6_addr *daddr) +{ + u32 a, b, c; + + a = saddr->s6_addr32[0]; + b = saddr->s6_addr32[1]; + c = saddr->s6_addr32[2]; + + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += ip6_ct_frag_hash_rnd; + __jhash_mix(a, b, c); + + a += saddr->s6_addr32[3]; + b += daddr->s6_addr32[0]; + c += daddr->s6_addr32[1]; + __jhash_mix(a, b, c); + + a += daddr->s6_addr32[2]; + b += daddr->s6_addr32[3]; + c += id; + __jhash_mix(a, b, c); + + return c & (IP6CT_Q_HASHSZ - 1); +} + +static struct timer_list ip6_ct_frag_secret_timer; +int sysctl_ip6_ct_frag_secret_interval = 10 * 60 * HZ; + +static void ip6_ct_frag_secret_rebuild(unsigned long dummy) +{ + unsigned long now = jiffies; + int i; + + write_lock(&ip6_ct_frag_lock); + get_random_bytes(&ip6_ct_frag_hash_rnd, sizeof(u32)); + for (i = 0; i < IP6CT_Q_HASHSZ; i++) { + struct ip6ct_frag_queue *q; + + q = ip6_ct_frag_hash[i]; + while (q) { + struct ip6ct_frag_queue *next = q->next; + unsigned int hval = ip6qhashfn(q->id, + &q->saddr, + &q->daddr); + + if (hval != i) { + /* Unlink. */ + if (q->next) + q->next->pprev = q->pprev; + *q->pprev = q->next; + + /* Relink to new hash chain. */ + if ((q->next = ip6_ct_frag_hash[hval]) != NULL) + q->next->pprev = &q->next; + ip6_ct_frag_hash[hval] = q; + q->pprev = &ip6_ct_frag_hash[hval]; + } + + q = next; + } + } + write_unlock(&ip6_ct_frag_lock); + + mod_timer(&ip6_ct_frag_secret_timer, now + sysctl_ip6_ct_frag_secret_interval); +} + +atomic_t ip6_ct_frag_mem = ATOMIC_INIT(0); + +/* Memory Tracking Functions. */ +static inline void frag_kfree_skb(struct sk_buff *skb) +{ + atomic_sub(skb->truesize, &ip6_ct_frag_mem); + if (IP6CT_FRAG6_CB(skb)->orig) + kfree_skb(IP6CT_FRAG6_CB(skb)->orig); + + kfree_skb(skb); +} + +static inline void frag_free_queue(struct ip6ct_frag_queue *fq) +{ + atomic_sub(sizeof(struct ip6ct_frag_queue), &ip6_ct_frag_mem); + kfree(fq); +} + +static inline struct ip6ct_frag_queue *frag_alloc_queue(void) +{ + struct ip6ct_frag_queue *fq = kmalloc(sizeof(struct ip6ct_frag_queue), GFP_ATOMIC); + + if(!fq) + return NULL; + atomic_add(sizeof(struct ip6ct_frag_queue), &ip6_ct_frag_mem); + return fq; +} + +/* Destruction primitives. */ + +/* Complete destruction of fq. */ +static void ip6_ct_frag_destroy(struct ip6ct_frag_queue *fq) +{ + struct sk_buff *fp; + + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); + + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(fp); + fp = xp; + } + + frag_free_queue(fq); +} + +static __inline__ void fq_put(struct ip6ct_frag_queue *fq) +{ + if (atomic_dec_and_test(&fq->refcnt)) + ip6_ct_frag_destroy(fq); +} + +/* Kill fq entry. It is not destroyed immediately, + * because caller (and someone more) holds reference count. + */ +static __inline__ void fq_kill(struct ip6ct_frag_queue *fq) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } +} + +static void ip6_ct_frag_evictor(void) +{ + struct ip6ct_frag_queue *fq; + struct list_head *tmp; + + for(;;) { + if (atomic_read(&ip6_ct_frag_mem) <= sysctl_ip6_ct_frag_low_thresh) + return; + read_lock(&ip6_ct_frag_lock); + if (list_empty(&ip6_ct_frag_lru_list)) { + read_unlock(&ip6_ct_frag_lock); + return; + } + tmp = ip6_ct_frag_lru_list.next; + fq = list_entry(tmp, struct ip6ct_frag_queue, lru_list); + atomic_inc(&fq->refcnt); + read_unlock(&ip6_ct_frag_lock); + + spin_lock(&fq->lock); + if (!(fq->last_in&COMPLETE)) + fq_kill(fq); + spin_unlock(&fq->lock); + + fq_put(fq); + } +} + +static void ip6_ct_frag_expire(unsigned long data) +{ + struct ip6ct_frag_queue *fq = (struct ip6ct_frag_queue *) data; + + spin_lock(&fq->lock); + + if (fq->last_in & COMPLETE) + goto out; + + fq_kill(fq); + +out: + spin_unlock(&fq->lock); + fq_put(fq); +} + +/* Creation primitives. */ + + +static struct ip6ct_frag_queue *ip6_ct_frag_intern(unsigned int hash, + struct ip6ct_frag_queue *fq_in) +{ + struct ip6ct_frag_queue *fq; + + write_lock(&ip6_ct_frag_lock); +#ifdef CONFIG_SMP + for (fq = ip6_ct_frag_hash[hash]; fq; fq = fq->next) { + if (fq->id == fq_in->id && + !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && + !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { + atomic_inc(&fq->refcnt); + write_unlock(&ip6_ct_frag_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in); + return fq; + } + } +#endif + fq = fq_in; + + if (!mod_timer(&fq->timer, jiffies + sysctl_ip6_ct_frag_time)) + atomic_inc(&fq->refcnt); + + atomic_inc(&fq->refcnt); + if((fq->next = ip6_ct_frag_hash[hash]) != NULL) + fq->next->pprev = &fq->next; + ip6_ct_frag_hash[hash] = fq; + fq->pprev = &ip6_ct_frag_hash[hash]; + INIT_LIST_HEAD(&fq->lru_list); + list_add_tail(&fq->lru_list, &ip6_ct_frag_lru_list); + ip6_ct_frag_nqueues++; + write_unlock(&ip6_ct_frag_lock); + return fq; +} + + +static struct ip6ct_frag_queue * +ip6_ct_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct ip6ct_frag_queue *fq; + + if ((fq = frag_alloc_queue()) == NULL) { + DEBUGP("Can't alloc new queue\n"); + goto oom; + } + + memset(fq, 0, sizeof(struct ip6ct_frag_queue)); + + fq->id = id; + ipv6_addr_copy(&fq->saddr, src); + ipv6_addr_copy(&fq->daddr, dst); + + init_timer(&fq->timer); + fq->timer.function = ip6_ct_frag_expire; + fq->timer.data = (long) fq; + fq->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&fq->refcnt, 1); + + return ip6_ct_frag_intern(hash, fq); + +oom: + return NULL; +} + +static __inline__ struct ip6ct_frag_queue * +fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct ip6ct_frag_queue *fq; + unsigned int hash = ip6qhashfn(id, src, dst); + + read_lock(&ip6_ct_frag_lock); + for(fq = ip6_ct_frag_hash[hash]; fq; fq = fq->next) { + if (fq->id == id && + !ipv6_addr_cmp(src, &fq->saddr) && + !ipv6_addr_cmp(dst, &fq->daddr)) { + atomic_inc(&fq->refcnt); + read_unlock(&ip6_ct_frag_lock); + return fq; + } + } + read_unlock(&ip6_ct_frag_lock); + + return ip6_ct_frag_create(hash, id, src, dst); +} + + +static int ip6_ct_frag_queue(struct ip6ct_frag_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr, int nhoff) +{ + struct sk_buff *prev, *next; + int offset, end; + + if (fq->last_in & COMPLETE) { + DEBUGP("Allready completed\n"); + goto err; + } + + offset = ntohs(fhdr->frag_off) & ~0x7; + end = offset + (ntohs(skb->nh.ipv6h->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + + if ((unsigned int)end > IPV6_MAXPLEN) { + DEBUGP("offset is too large.\n"); + return -1; + } + + if (skb->ip_summed == CHECKSUM_HW) + skb->csum = csum_sub(skb->csum, + csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); + + /* Is this the final fragment? */ + if (!(fhdr->frag_off & htons(IP6_MF))) { + /* If we already have some bits beyond end + * or have different end, the segment is corrupted. + */ + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) { + DEBUGP("already received last fragment\n"); + goto err; + } + fq->last_in |= LAST_IN; + fq->len = end; + } else { + /* Check if the fragment is rounded to 8 bytes. + * Required by the RFC. + */ + if (end & 0x7) { + /* RFC2460 says always send parameter problem in + * this case. -DaveM + */ + DEBUGP("the end of this message is not rounded to 8 bytes.\n"); + return -1; + } + if (end > fq->len) { + /* Some bits beyond end -> corruption. */ + if (fq->last_in & LAST_IN) { + DEBUGP("last packet already reached.\n"); + goto err; + } + fq->len = end; + } + } + + if (end == offset) + goto err; + + /* Point into the IP datagram 'data' part. */ + if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { + DEBUGP("queue: message is too short.\n"); + goto err; + } + if (end-offset < skb->len) { + if (pskb_trim(skb, end - offset)) { + DEBUGP("Can't trim\n"); + goto err; + } + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = NULL; + for(next = fq->fragments; next != NULL; next = next->next) { + if (IP6CT_FRAG6_CB(next)->offset >= offset) + break; /* bingo! */ + prev = next; + } + + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. + */ + if (prev) { + int i = (IP6CT_FRAG6_CB(prev)->offset + prev->len) - offset; + + if (i > 0) { + offset += i; + if (end <= offset) { + DEBUGP("overlap\n"); + goto err; + } + if (!pskb_pull(skb, i)) { + DEBUGP("Can't pull\n"); + goto err; + } + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + } + + /* Look for overlap with succeeding segments. + * If we can merge fragments, do it. + */ + while (next && IP6CT_FRAG6_CB(next)->offset < end) { + int i = end - IP6CT_FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ + + if (i < next->len) { + /* Eat head of the next overlapped fragment + * and leave the loop. The next ones cannot overlap. + */ + DEBUGP("Eat head of the overlapped parts.: %d", i); + if (!pskb_pull(next, i)) + goto err; + IP6CT_FRAG6_CB(next)->offset += i; /* next fragment */ + fq->meat -= i; + if (next->ip_summed != CHECKSUM_UNNECESSARY) + next->ip_summed = CHECKSUM_NONE; + break; + } else { + struct sk_buff *free_it = next; + + /* Old fragmnet is completely overridden with + * new one drop it. + */ + next = next->next; + + if (prev) + prev->next = next; + else + fq->fragments = next; + + fq->meat -= free_it->len; + frag_kfree_skb(free_it); + } + } + + IP6CT_FRAG6_CB(skb)->offset = offset; + + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (prev) + prev->next = skb; + else + fq->fragments = skb; + + skb->dev = NULL; + fq->stamp = skb->stamp; + fq->meat += skb->len; + atomic_add(skb->truesize, &ip6_ct_frag_mem); + + /* The first fragment. + * nhoffset is obtained from the first fragment, of course. + */ + if (offset == 0) { + fq->nhoffset = nhoff; + fq->last_in |= FIRST_IN; + } + write_lock(&ip6_ct_frag_lock); + list_move_tail(&fq->lru_list, &ip6_ct_frag_lru_list); + write_unlock(&ip6_ct_frag_lock); + return 0; + +err: + return -1; +} + +/* + * Check if this packet is complete. + * Returns NULL on failure by any reason, and pointer + * to current nexthdr field in reassembled frame. + * + * It is called with locked fq, and caller must check that + * queue is eligible for reassembly i.e. it is not COMPLETE, + * the last and the first frames arrived and all the bits are here. + */ +static struct sk_buff * +ip6_ct_frag_reasm(struct ip6ct_frag_queue *fq, struct net_device *dev) +{ + struct sk_buff *fp, *op, *head = fq->fragments; + int payload_len; + + fq_kill(fq); + + BUG_TRAP(head != NULL); + BUG_TRAP(IP6CT_FRAG6_CB(head)->offset == 0); + + /* Unfragmented part is taken from the first segment. */ + payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); + if (payload_len > IPV6_MAXPLEN) { + DEBUGP("payload len is too large.\n"); + goto out_oversize; + } + + /* Head of list must not be cloned. */ + if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + DEBUGP("skb is cloned but can't expand head"); + goto out_oom; + } + + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. */ + if (skb_shinfo(head)->frag_list) { + struct sk_buff *clone; + int i, plen = 0; + + if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { + DEBUGP("Can't alloc skb\n"); + goto out_oom; + } + clone->next = head->next; + head->next = clone; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_shinfo(head)->frag_list = NULL; + for (i=0; inr_frags; i++) + plen += skb_shinfo(head)->frags[i].size; + clone->len = clone->data_len = head->data_len - plen; + head->data_len -= clone->len; + head->len -= clone->len; + clone->csum = 0; + clone->ip_summed = head->ip_summed; + + IP6CT_FRAG6_CB(clone)->orig = NULL; + atomic_add(clone->truesize, &ip6_ct_frag_mem); + } + + /* We have to remove fragment header from datagram and to relocate + * header in order to calculate ICV correctly. */ + head->nh.raw[fq->nhoffset] = head->h.raw[0]; + memmove(head->head + sizeof(struct frag_hdr), head->head, + (head->data - head->head) - sizeof(struct frag_hdr)); + head->mac.raw += sizeof(struct frag_hdr); + head->nh.raw += sizeof(struct frag_hdr); + + skb_shinfo(head)->frag_list = head->next; + head->h.raw = head->data; + skb_push(head, head->data - head->nh.raw); + atomic_sub(head->truesize, &ip6_ct_frag_mem); + + for (fp=head->next; fp; fp = fp->next) { + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_HW) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + atomic_sub(fp->truesize, &ip6_ct_frag_mem); + } + + head->next = NULL; + head->dev = dev; + head->stamp = fq->stamp; + head->nh.ipv6h->payload_len = ntohs(payload_len); + + /* Yes, and fold redundant checksum back. 8) */ + if (head->ip_summed == CHECKSUM_HW) + head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); + + fq->fragments = NULL; + + /* all original skbs are linked into the IP6CT_FRAG6_CB(head).orig */ + fp = skb_shinfo(head)->frag_list; + if (IP6CT_FRAG6_CB(fp)->orig == NULL) + /* at above code, head skb is divided into two skbs. */ + fp = fp->next; + + op = IP6CT_FRAG6_CB(head)->orig; + for (; fp; fp = fp->next) { + struct sk_buff *orig = IP6CT_FRAG6_CB(fp)->orig; + + op->next = orig; + op = orig; + IP6CT_FRAG6_CB(fp)->orig = NULL; + } + + return head; + +out_oversize: + if (net_ratelimit()) + printk(KERN_DEBUG "ip6_ct_frag_reasm: payload len = %d\n", payload_len); + goto out_fail; +out_oom: + if (net_ratelimit()) + printk(KERN_DEBUG "ip6_ct_frag_reasm: no memory for reassembly\n"); +out_fail: + return NULL; +} + +/* + * find the header just before Fragment Header. + * + * if success return 0 and set ... + * (*prevhdrp): the value of "Next Header Field" in the header + * just before Fragment Header. + * (*prevhoff): the offset of "Next Header Field" in the header + * just before Fragment Header. + * (*fhoff) : the offset of Fragment Header. + * + * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c + * + */ +static int +find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) +{ + u8 nexthdr = skb->nh.ipv6h->nexthdr; + u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data; + int start = (u8 *)(skb->nh.ipv6h+1) - skb->data; + int len = skb->len - start; + u8 prevhdr = NEXTHDR_IPV6; + + while (nexthdr != NEXTHDR_FRAGMENT) { + struct ipv6_opt_hdr hdr; + int hdrlen; + + if (!ipv6_ext_hdr(nexthdr)) { + return -1; + } + if (len < (int)sizeof(struct ipv6_opt_hdr)) { + DEBUGP("too short\n"); + return -1; + } + if (nexthdr == NEXTHDR_NONE) { + DEBUGP("next header is none\n"); + return -1; + } + if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + BUG(); + if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr.hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(&hdr); + + prevhdr = nexthdr; + prev_nhoff = start; + + nexthdr = hdr.nexthdr; + len -= hdrlen; + start += hdrlen; + } + + if (len < 0) + return -1; + + *prevhdrp = prevhdr; + *prevhoff = prev_nhoff; + *fhoff = start; + + return 0; +} + +struct sk_buff *ip6_ct_gather_frags(struct sk_buff *skb) +{ + struct sk_buff *clone; + struct net_device *dev = skb->dev; + struct frag_hdr *fhdr; + struct ip6ct_frag_queue *fq; + struct ipv6hdr *hdr; + int fhoff, nhoff; + u8 prevhdr; + struct sk_buff *ret_skb = NULL; + + /* Jumbo payload inhibits frag. header */ + if (skb->nh.ipv6h->payload_len == 0) { + DEBUGP("payload len = 0\n"); + return skb; + } + + if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) + return skb; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone == NULL) { + DEBUGP("Can't clone skb\n"); + return skb; + } + + IP6CT_FRAG6_CB(clone)->orig = skb; + + if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { + DEBUGP("message is too short.\n"); + goto ret_orig; + } + + clone->h.raw = clone->data + fhoff; + hdr = clone->nh.ipv6h; + fhdr = (struct frag_hdr *)clone->h.raw; + + if (!(fhdr->frag_off & htons(0xFFF9))) { + DEBUGP("Invalid fragment offset\n"); + /* It is not a fragmented frame */ + goto ret_orig; + } + + if (atomic_read(&ip6_ct_frag_mem) > sysctl_ip6_ct_frag_high_thresh) + ip6_ct_frag_evictor(); + + if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) == NULL) { + DEBUGP("Can't find and can't create new queue\n"); + goto ret_orig; + } + + spin_lock(&fq->lock); + + if (ip6_ct_frag_queue(fq, clone, fhdr, nhoff) < 0) { + spin_unlock(&fq->lock); + DEBUGP("Can't insert skb to queue\n"); + fq_put(fq); + goto ret_orig; + } + + if (fq->last_in == (FIRST_IN|LAST_IN) && + fq->meat == fq->len) { + ret_skb = ip6_ct_frag_reasm(fq, dev); + + if (ret_skb == NULL) + DEBUGP("Can't reassemble fragmented packets\n"); + } + spin_unlock(&fq->lock); + + fq_put(fq); + return ret_skb; + +ret_orig: + kfree_skb(clone); + return skb; +} + +int ip6_ct_output_frags(struct sk_buff *skb, struct nf_info *info) +{ + struct sk_buff *s, *s2; + struct nf_info *copy_info; + + for (s = IP6CT_FRAG6_CB(skb)->orig; s;) { + if (skb->nfct) + nf_conntrack_get(skb->nfct); + s->nfct = skb->nfct; + s->nfcache = skb->nfcache; + + /* + * nf_reinject() frees copy_info, + * so I have to copy it every time. (T-T + */ + copy_info = kmalloc(sizeof(*copy_info), GFP_ATOMIC); + if (copy_info == NULL) { + DEBUGP("Can't kmalloc() for nf_info\n"); + return -1; + } + + copy_info->pf = info->pf; + copy_info->hook = info->hook; + copy_info->indev = info->indev; + copy_info->outdev = info->outdev; + copy_info->okfn = info->okfn; + copy_info->elem = info->elem; + + /* + * nf_reinject() put the module "ip6_conntrack". + */ + if (!try_module_get(info->elem->owner)) { + DEBUGP("Can't get module.\n"); + kfree_skb(s); + continue; + } + + if (copy_info->indev) + dev_hold(copy_info->indev); + if (copy_info->outdev) + dev_hold(copy_info->outdev); + + s2 = s->next; + nf_reinject(s, copy_info, NF_ACCEPT); + s = s2; + } + + kfree_skb(skb); + + return 0; +} + +int ip6_ct_kfree_frags(struct sk_buff *skb) +{ + struct sk_buff *s, *s2; + + for (s = IP6CT_FRAG6_CB(skb)->orig; s; s = s2) { + + s2 = s->next; + kfree_skb(s); + } + + kfree_skb(skb); + + return 0; +} + +#ifdef CONFIG_SYSCTL + +#define IP6CT_HIGH_THRESH_NAME "ip6ct_frags_high_thresh" +#define IP6CT_LOW_THRESH_NAME "ip6ct_frags_low_thresh" +#define IP6CT_TIMEOUT_NAME "ip6ct_frags_timeout" + +static struct ctl_table_header *ip6_ct_frags_sysctl_header; + +static ctl_table ip6_ct_frags_table[] = { + { + .ctl_name = IP6CT_FRAGS_HIGH_THRESH, + .procname = IP6CT_HIGH_THRESH_NAME, + .data = &sysctl_ip6_ct_frag_high_thresh, + .maxlen = sizeof(sysctl_ip6_ct_frag_high_thresh), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = IP6CT_FRAGS_LOW_THRESH, + .procname = IP6CT_LOW_THRESH_NAME, + .data = &sysctl_ip6_ct_frag_low_thresh, + .maxlen = sizeof(sysctl_ip6_ct_frag_high_thresh), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = IP6CT_FRAGS_TIMEOUT, + .procname = IP6CT_TIMEOUT_NAME, + .data = &sysctl_ip6_ct_frag_time, + .maxlen = sizeof(sysctl_ip6_ct_frag_time), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .ctl_name = 0 } +}; + +static ctl_table ip6_ct_frags_dir_table[] = { + { + .ctl_name = NET_IPV6, + .procname = "ipv6", NULL, + .mode = 0555, + .child = ip6_ct_frags_table + }, + { .ctl_name = 0 } +}; + +static ctl_table ip6_ct_frags_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = ip6_ct_frags_dir_table + }, + { .ctl_name = 0 } +}; + +#endif /*CONFIG_SYSCTL*/ + +int __init ip6_ct_frags_init(void) +{ +#ifdef CONFIG_SYSCTL + ip6_ct_frags_sysctl_header = register_sysctl_table(ip6_ct_frags_root_table, 0); + + if (ip6_ct_frags_sysctl_header == NULL) { + printk("ip6_ct_frags_init: Can't register sysctl tables.\n"); + return -ENOMEM; + } +#endif + + ip6_ct_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + init_timer(&ip6_ct_frag_secret_timer); + ip6_ct_frag_secret_timer.function = ip6_ct_frag_secret_rebuild; + ip6_ct_frag_secret_timer.expires = jiffies + sysctl_ip6_ct_frag_secret_interval; + add_timer(&ip6_ct_frag_secret_timer); + + return 0; +} + +void ip6_ct_frags_cleanup(void) +{ + del_timer(&ip6_ct_frag_secret_timer); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(ip6_ct_frags_sysctl_header); +#endif + sysctl_ip6_ct_frag_low_thresh = 0; + ip6_ct_frag_evictor(); +} diff -urN linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_standalone.c x1/net/ipv6/netfilter/ip6_conntrack_standalone.c --- linux-2.6.11/net/ipv6/netfilter/ip6_conntrack_standalone.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6_conntrack_standalone.c 2003-09-20 11:00:21.000000000 +0200 @@ -0,0 +1,502 @@ +/* + * IPv6 Connection Tracking + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip_conntrack_standalone.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* This file contains all the functions required for the standalone + ip6_conntrack module. + + These are not required by the compatibility layer. +*/ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip6_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip6_conntrack_lock) + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); + +static int kill_proto(const struct ip6_conntrack *i, void *data) +{ + return (i->tuplehash[IP6_CT_DIR_ORIGINAL].tuple.dst.protonum == + *((u_int8_t *) data)); +} + +static unsigned int +print_tuple(char *buffer, const struct ip6_conntrack_tuple *tuple, + struct ip6_conntrack_protocol *proto) +{ + int len; + + len = sprintf(buffer, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ", + NIP6(tuple->src.ip), NIP6(tuple->dst.ip)); + + len += proto->print_tuple(buffer + len, tuple); + + return len; +} + +/* FIXME: Don't print source proto part. --RR */ +static unsigned int +print_expect(char *buffer, const struct ip6_conntrack_expect *expect) +{ + unsigned int len; + + if (expect->expectant->helper->timeout) + len = sprintf(buffer, "EXPECTING: %lu ", + timer_pending(&expect->timeout) + ? (expect->timeout.expires - jiffies)/HZ : 0); + else + len = sprintf(buffer, "EXPECTING: - "); + len += sprintf(buffer + len, "use=%u proto=%u ", + atomic_read(&expect->use), expect->tuple.dst.protonum); + len += print_tuple(buffer + len, &expect->tuple, + __ip6_ct_find_proto(expect->tuple.dst.protonum)); + len += sprintf(buffer + len, "\n"); + return len; +} + +static unsigned int +print_conntrack(char *buffer, struct ip6_conntrack *conntrack) +{ + unsigned int len; + struct ip6_conntrack_protocol *proto + = __ip6_ct_find_proto(conntrack->tuplehash[IP6_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + len = sprintf(buffer, "%-8s %u %lu ", + proto->name, + conntrack->tuplehash[IP6_CT_DIR_ORIGINAL] + .tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (conntrack->timeout.expires - jiffies)/HZ : 0); + + len += proto->print_conntrack(buffer + len, conntrack); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP6_CT_DIR_ORIGINAL].tuple, + proto); + if (!(test_bit(IP6S_SEEN_REPLY_BIT, &conntrack->status))) + len += sprintf(buffer + len, "[UNREPLIED] "); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP6_CT_DIR_REPLY].tuple, + proto); + if (test_bit(IP6S_ASSURED_BIT, &conntrack->status)) + len += sprintf(buffer + len, "[ASSURED] "); + len += sprintf(buffer + len, "use=%u ", + atomic_read(&conntrack->ct_general.use)); + len += sprintf(buffer + len, "\n"); + + return len; +} + +/* Returns true when finished. */ +static inline int +conntrack_iterate(const struct ip6_conntrack_tuple_hash *hash, + char *buffer, off_t offset, off_t *upto, + unsigned int *len, unsigned int maxlen) +{ + unsigned int newlen; + IP6_NF_ASSERT(hash->ctrack); + + MUST_BE_READ_LOCKED(&ip6_conntrack_lock); + + /* Only count originals */ + if (DIRECTION(hash)) + return 0; + + if ((*upto)++ < offset) + return 0; + + newlen = print_conntrack(buffer + *len, hash->ctrack); + if (*len + newlen > maxlen) + return 1; + else *len += newlen; + + return 0; +} + +static int +list_conntracks(char *buffer, char **start, off_t offset, int length) +{ + unsigned int i; + unsigned int len = 0; + off_t upto = 0; + struct list_head *e; + + READ_LOCK(&ip6_conntrack_lock); + /* Traverse hash; print originals then reply. */ + for (i = 0; i < ip6_conntrack_htable_size; i++) { + if (LIST_FIND(&ip6_conntrack_hash[i], conntrack_iterate, + struct ip6_conntrack_tuple_hash *, + buffer, offset, &upto, &len, length)) + goto finished; + } + + /* Now iterate through expecteds. */ + for (e = ip6_conntrack_expect_list.next; + e != &ip6_conntrack_expect_list; e = e->next) { + unsigned int last_len; + struct ip6_conntrack_expect *expect + = (struct ip6_conntrack_expect *)e; + if (upto++ < offset) continue; + + last_len = len; + len += print_expect(buffer + len, expect); + if (len > length) { + len = last_len; + goto finished; + } + } + + finished: + READ_UNLOCK(&ip6_conntrack_lock); + + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start = (char *)((unsigned int)upto - offset); + return len; +} + +static unsigned int ip6_confirm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); +static unsigned int ip6_conntrack_out(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); +static unsigned int ip6_conntrack_reasm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); +static unsigned int ip6_conntrack_local(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ip6_conntrack_in_ops = { + /* Don't forget to change .hook to "ip6_conntrack_input". - zak */ + .hook = ip6_conntrack_reasm, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ip6_conntrack_local_out_ops = { + .hook = ip6_conntrack_local, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_CONNTRACK, +}; + +/* Refragmenter; last chance. */ +static struct nf_hook_ops ip6_conntrack_out_ops = { + .hook = ip6_conntrack_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_LAST, +}; + +static struct nf_hook_ops ip6_conntrack_local_in_ops = { + .hook = ip6_confirm, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_IN, + .priority = NF_IP6_PRI_LAST-1, +}; + +static unsigned int ip6_confirm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int ret; + + ret = ip6_conntrack_confirm(*pskb); + + return ret; +} + +static unsigned int ip6_conntrack_out(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + + if (ip6_conntrack_confirm(*pskb) != NF_ACCEPT) + return NF_DROP; + + return NF_ACCEPT; +} + +static unsigned int ip6_conntrack_reasm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct sk_buff **rsmd_pskb = &skb; + int fragd = 0; + int ret; + + skb->nfcache |= NFC_UNKNOWN; + + /* + * Previously seen (loopback)? Ignore. Do this before + * fragment check. + */ + if (skb->nfct) { + DEBUGP("previously seen\n"); + return NF_ACCEPT; + } + + skb = ip6_ct_gather_frags(skb); + + /* queued */ + if (skb == NULL) + return NF_STOLEN; + + if (skb != (*pskb)) + fragd = 1; + + ret = ip6_conntrack_in(hooknum, rsmd_pskb, in, out, okfn); + + if (!fragd) + return ret; + + if (ret == NF_DROP) { + ip6_ct_kfree_frags(skb); + }else{ + struct nf_info info; + + info.pf = PF_INET6; + info.hook = hooknum; + info.indev = in; + info.outdev = out; + info.okfn = okfn; + switch (hooknum) { + case NF_IP6_PRE_ROUTING: + info.elem = &ip6_conntrack_in_ops; + break; + case NF_IP6_LOCAL_OUT: + info.elem = &ip6_conntrack_local_out_ops; + break; + } + + if (ip6_ct_output_frags(skb, &info) <0) + DEBUGP("Can't output fragments\n"); + + } + + return NF_STOLEN; +} + +static unsigned int ip6_conntrack_local(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (net_ratelimit()) + printk("ip6t_hook: IPv6 header is too short.\n"); + return NF_ACCEPT; + } + + ret = ip6_conntrack_reasm(hooknum, pskb, in, out, okfn); + + return ret; +} + +static int init_or_cleanup(int init) +{ + struct proc_dir_entry *proc; + int ret = 0; + + if (!init) goto cleanup; + + ret = ip6_ct_frags_init(); + if (ret < 0) + goto cleanup_reasm; + + ret = ip6_conntrack_init(); + if (ret < 0) + goto cleanup_nothing; + + proc = proc_net_create("ip6_conntrack",0,list_conntracks); + if (!proc) goto cleanup_init; + proc->owner = THIS_MODULE; + + ret = nf_register_hook(&ip6_conntrack_in_ops); + if (ret < 0) { + printk("ip6_conntrack: can't register pre-routing hook.\n"); + goto cleanup_proc; + } + ret = nf_register_hook(&ip6_conntrack_local_out_ops); + if (ret < 0) { + printk("ip6_conntrack: can't register local out hook.\n"); + goto cleanup_inops; + } + ret = nf_register_hook(&ip6_conntrack_out_ops); + if (ret < 0) { + printk("ip6_conntrack: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + ret = nf_register_hook(&ip6_conntrack_local_in_ops); + if (ret < 0) { + printk("ip6_conntrack: can't register local in hook.\n"); + goto cleanup_inoutandlocalops; + } + + return ret; + + cleanup: + nf_unregister_hook(&ip6_conntrack_local_in_ops); + cleanup_inoutandlocalops: + nf_unregister_hook(&ip6_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ip6_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ip6_conntrack_in_ops); + cleanup_proc: + proc_net_remove("ip6_conntrack"); + cleanup_init: + ip6_conntrack_cleanup(); + cleanup_reasm: + ip6_ct_frags_cleanup(); + cleanup_nothing: + return ret; +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int ip6_conntrack_protocol_register(struct ip6_conntrack_protocol *proto) +{ + int ret = 0; + struct list_head *i; + + WRITE_LOCK(&ip6_conntrack_lock); + for (i = ip6_protocol_list.next; i != &ip6_protocol_list; i = i->next) { + if (((struct ip6_conntrack_protocol *)i)->proto + == proto->proto) { + ret = -EBUSY; + goto out; + } + } + + list_prepend(&ip6_protocol_list, proto); + + out: + WRITE_UNLOCK(&ip6_conntrack_lock); + return ret; +} + +void ip6_conntrack_protocol_unregister(struct ip6_conntrack_protocol *proto) +{ + WRITE_LOCK(&ip6_conntrack_lock); + + /* ip_ct_find_proto() returns proto_generic in case there is no protocol + * helper. So this should be enough - HW */ + LIST_DELETE(&ip6_protocol_list, proto); + WRITE_UNLOCK(&ip6_conntrack_lock); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + ip6_ct_selective_cleanup(kill_proto, &proto->proto); +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +/* Some modules need us, but don't depend directly on any symbol. + They should call this. */ +void need_ip6_conntrack(void) +{ +} + +EXPORT_SYMBOL(ip6_conntrack_protocol_register); +EXPORT_SYMBOL(ip6_conntrack_protocol_unregister); +EXPORT_SYMBOL(ip6_invert_tuplepr); +EXPORT_SYMBOL(ip6_conntrack_alter_reply); +EXPORT_SYMBOL(ip6_conntrack_destroyed); +EXPORT_SYMBOL(ip6_conntrack_get); +EXPORT_SYMBOL(need_ip6_conntrack); +EXPORT_SYMBOL(ip6_conntrack_helper_register); +EXPORT_SYMBOL(ip6_conntrack_helper_unregister); +EXPORT_SYMBOL(ip6_ct_selective_cleanup); +EXPORT_SYMBOL(ip6_ct_refresh); +EXPORT_SYMBOL(ip6_ct_find_proto); +EXPORT_SYMBOL(__ip6_ct_find_proto); +EXPORT_SYMBOL(ip6_ct_find_helper); +EXPORT_SYMBOL(ip6_conntrack_expect_related); +EXPORT_SYMBOL(ip6_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(ip6_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(ip6_conntrack_expect_put); +EXPORT_SYMBOL(ip6_conntrack_tuple_taken); +EXPORT_SYMBOL(ip6_conntrack_htable_size); +EXPORT_SYMBOL(ip6_conntrack_expect_list); +EXPORT_SYMBOL(ip6_conntrack_lock); +EXPORT_SYMBOL_GPL(ip6_conntrack_find_get); +EXPORT_SYMBOL_GPL(ip6_conntrack_put); diff -urN linux-2.6.11/net/ipv6/netfilter/ip6t_REJECT.c x1/net/ipv6/netfilter/ip6t_REJECT.c --- linux-2.6.11/net/ipv6/netfilter/ip6t_REJECT.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6t_REJECT.c 2004-11-25 15:47:19.000000000 +0100 @@ -0,0 +1,453 @@ +/* + * IP6 tables REJECT target module + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on net/ipv4/netfilter/ipt_REJECT.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* This module works well with IPv6 Connection Tracking. - kozakai */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Yasuyuki KOZAKAI "); +MODULE_DESCRIPTION("IP6 tables REJECT target module"); +MODULE_LICENSE("GPL"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static void connection_attach(struct sk_buff *new_skb, struct sk_buff *skb) +{ + void (*attach)(struct sk_buff *, struct sk_buff *); + if (skb->nfct && (attach = ip6_ct_attach) != NULL) { + mb(); + attach(new_skb, skb); + } +} + +static int maybe_reroute(struct sk_buff *skb) +{ + if (skb->nfcache & NFC_ALTERED){ + if (ip6_route_me_harder(skb) != 0){ + kfree_skb(skb); + return -EINVAL; + } + } + + return dst_output(skb); +} + +/* Send RST reply */ +static void send_reset(struct sk_buff *oldskb) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, tcphoff, hh_len; + int needs_ack; + struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h; + struct dst_entry *dst = NULL; + u8 proto = oip6h->nexthdr; + struct flowi fl; + int err; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + return; + } + + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + &proto, oldskb->len - ((u8*)(oip6h+1) + - oldskb->data)); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { + DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) { + if (net_ratelimit()) + printk("ip6t_REJECT: Can't copy tcp header\n"); + return; + } + + /* No RST for RST. */ + if (otcph.rst) { + DEBUGP("ip6t_REJECT: RST is set\n"); + return; + } + + /* Check checksum. */ + if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, + skb_checksum(oldskb, tcphoff, otcplen, 0))) { + DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); + return; + } + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); + ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); + fl.fl_ip_sport = otcph.dest; + fl.fl_ip_dport = otcph.source; + err = ip6_dst_lookup(NULL, &dst, &fl); + if (err) { + if (net_ratelimit()) + printk("ip6t_REJECT: can't find dst. err = %d\n", err); + return; + } + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + if (net_ratelimit()) + printk("ip6t_REJECT: Can't alloc skb\n"); + dst_release(dst); + return; + } + + nskb->dst = dst; + dst_hold(dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) + skb_put(nskb, sizeof(struct ipv6hdr)); + ip6h->version = 6; + ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->payload_len = htons(sizeof(struct tcphdr)); + ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); + ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); + + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr, + &nskb->nh.ipv6h->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial((char *)tcph, + sizeof(struct tcphdr), 0)); + + connection_attach(nskb, oldskb); + + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, + maybe_reroute); + + dst_release(dst); +} + +static void send_unreach(struct sk_buff *skb_in, unsigned char code) +{ + struct ipv6hdr *ip6h, *hdr = skb_in->nh.ipv6h; + struct icmp6hdr *icmp6h; + struct dst_entry *dst = NULL; + struct rt6_info *rt; + int tmo; + __u32 csum; + unsigned int len, datalen, hh_len; + int saddr_type, daddr_type; + unsigned int ptr, ip6off; + u8 proto; + struct flowi fl; + struct sk_buff *nskb; + char *data; + + saddr_type = ipv6_addr_type(&hdr->saddr); + daddr_type = ipv6_addr_type(&hdr->daddr); + + if ((!(saddr_type & IPV6_ADDR_UNICAST)) || + (!(daddr_type & IPV6_ADDR_UNICAST))) { + DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + return; + } + + ip6off = skb_in->nh.raw - skb_in->data; + proto = hdr->nexthdr; + ptr = ipv6_skip_exthdr(skb_in, ip6off + sizeof(struct ipv6hdr), &proto, + skb_in->len - ip6off); + + if ((ptr < 0) || (ptr > skb_in->len)) { + ptr = ip6off + sizeof(struct ipv6hdr); + proto = hdr->nexthdr; + } else if (proto == IPPROTO_ICMPV6) { + u8 type; + + if (skb_copy_bits(skb_in, ptr + offsetof(struct icmp6hdr, + icmp6_type), &type, 1)) { + DEBUGP("ip6t_REJECT: Can't get ICMPv6 type\n"); + return; + } + + if (!(type & ICMPV6_INFOMSG_MASK)) { + DEBUGP("ip6t_REJECT: no reply to icmp error\n"); + return; + } + } else if (proto == IPPROTO_UDP) { + int plen = skb_in->len - (ptr - ip6off); + uint16_t check; + + if (plen < sizeof(struct udphdr)) { + DEBUGP("ip6t_REJECT: too short\n"); + return; + } + + if (skb_copy_bits(skb_in, ptr + offsetof(struct udphdr, check), + &check, 2)) { + if (net_ratelimit()) + printk("ip6t_REJECT: can't get copy from skb"); + return; + } + + if (check && + csum_ipv6_magic(&hdr->saddr, &hdr->daddr, plen, + IPPROTO_UDP, + skb_checksum(skb_in, ptr, plen, 0))) { + DEBUGP("ip6t_REJECT: UDP checksum is invalid.\n"); + return; + } + } + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl.fl6_src, &hdr->daddr); + ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); + fl.fl_icmp_type = ICMPV6_DEST_UNREACH; + fl.fl_icmp_code = code; + + if (ip6_dst_lookup(NULL, &dst, &fl)) { + return; + } + + rt = (struct rt6_info *)dst; + tmo = 1*HZ; + + if (rt->rt6i_dst.plen < 128) + tmo >>= ((128 - rt->rt6i_dst.plen)>>5); + + if (!xrlim_allow(dst, tmo)) { + if (net_ratelimit()) + printk("ip6t_REJECT: rate limitted\n"); + goto dst_release_out; + } + + len = skb_in->len + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr); + + if (len > dst_pmtu(dst)) + len = dst_pmtu(dst); + if (len > IPV6_MIN_MTU) + len = IPV6_MIN_MTU; + + datalen = len - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr); + hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15; + + nskb = alloc_skb(hh_len + 15 + dst->header_len + dst->trailer_len + len, + GFP_ATOMIC); + + if (!nskb) { + if (net_ratelimit()) + printk("ip6t_REJECT: can't alloc skb\n"); + goto dst_release_out; + } + + nskb->priority = 0; + nskb->dst = dst; + dst_hold(dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) + skb_put(nskb, sizeof(struct ipv6hdr)); + ip6h->version = 6; + ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->nexthdr = IPPROTO_ICMPV6; + ip6h->payload_len = htons(datalen + sizeof(struct icmp6hdr)); + ipv6_addr_copy(&ip6h->saddr, &hdr->daddr); + ipv6_addr_copy(&ip6h->daddr, &hdr->saddr); + + icmp6h = (struct icmp6hdr *) skb_put(nskb, sizeof(struct icmp6hdr)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + icmp6h->icmp6_cksum = 0; + + data = skb_put(nskb, datalen); + + csum = csum_partial((unsigned char *)icmp6h, sizeof(struct icmp6hdr), 0); + csum = skb_copy_and_csum_bits(skb_in, ip6off, data, datalen, csum); + icmp6h->icmp6_cksum = csum_ipv6_magic(&hdr->saddr, &hdr->daddr, + datalen + sizeof(struct icmp6hdr), + IPPROTO_ICMPV6, csum); + + connection_attach(nskb, skb_in); + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, + maybe_reroute); + +dst_release_out: + dst_release(dst); +} + +static unsigned int reject6_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_reject_info *reject = targinfo; + + DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); + /* WARNING: This code causes reentry within ip6tables. + This means that the ip6tables jump stack is now crap. We + must return an absolute verdict. --RR */ + switch (reject->with) { + case IP6T_ICMP6_NO_ROUTE: + send_unreach(*pskb, ICMPV6_NOROUTE); + break; + case IP6T_ICMP6_ADM_PROHIBITED: + send_unreach(*pskb, ICMPV6_ADM_PROHIBITED); + break; + case IP6T_ICMP6_NOT_NEIGHBOUR: + send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR); + break; + case IP6T_ICMP6_ADDR_UNREACH: + send_unreach(*pskb, ICMPV6_ADDR_UNREACH); + break; + case IP6T_ICMP6_PORT_UNREACH: + send_unreach(*pskb, ICMPV6_PORT_UNREACH); + break; + case IP6T_ICMP6_ECHOREPLY: + /* Do nothing */ + break; + case IP6T_TCP_RESET: + send_reset(*pskb); + break; + default: + if (net_ratelimit()) + printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with); + break; + } + + return NF_DROP; +} + +static int check(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip6t_reject_info *rejinfo = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) { + DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize); + return 0; + } + + /* Only allow these for packet filtering. */ + if (strcmp(tablename, "filter") != 0) { + DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename); + return 0; + } + + if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN) + | (1 << NF_IP6_FORWARD) + | (1 << NF_IP6_LOCAL_OUT))) != 0) { + DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask); + return 0; + } + + if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { + printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); + return 0; + } else if (rejinfo->with == IP6T_TCP_RESET) { + /* Must specify that it's a TCP packet */ + if (e->ipv6.proto != IPPROTO_TCP + || (e->ipv6.invflags & IP6T_INV_PROTO)) { + DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); + return 0; + } + } + + return 1; +} + +static struct ip6t_target ip6t_reject_reg = { + .name = "REJECT", + .target = reject6_target, + .checkentry = check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + if (ip6t_register_target(&ip6t_reject_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_reject_reg); +} + +module_init(init); +module_exit(fini); diff -urN linux-2.6.11/net/ipv6/netfilter/ip6t_state.c x1/net/ipv6/netfilter/ip6t_state.c --- linux-2.6.11/net/ipv6/netfilter/ip6t_state.c 1970-01-01 01:00:00.000000000 +0100 +++ x1/net/ipv6/netfilter/ip6t_state.c 2004-11-25 15:47:19.000000000 +0100 @@ -0,0 +1,79 @@ +/* + * Matching connection tracking information + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on: net/ipv4/netfilter/ip6t_state.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +/* Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include +#include +#include +#include +#include + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct ip6t_state_info *sinfo = matchinfo; + enum ip6_conntrack_info ctinfo; + unsigned int statebit; + + if (!ip6_conntrack_get(skb, &ctinfo)) + statebit = IP6T_STATE_INVALID; + else + statebit = IP6T_STATE_BIT(ctinfo); + + return (sinfo->statemask & statebit); +} + +static int check(const char *tablename, + const struct ip6t_ip6 *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_state_info))) + return 0; + + return 1; +} + +static struct ip6t_match state_match = { + .name = "state", + .match = &match, + .checkentry = &check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + need_ip6_conntrack(); + return ip6t_register_match(&state_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&state_match); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff -urN linux-2.6.11/net/ipv6/proc.c x1/net/ipv6/proc.c --- linux-2.6.11/net/ipv6/proc.c 2005-03-02 08:38:07.000000000 +0100 +++ x1/net/ipv6/proc.c 2005-03-02 17:30:59.000000000 +0100 @@ -164,7 +164,13 @@ if (idev) { seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); +#ifdef CONFIG_IPV6_STATISTICS + snmp6_seq_show_item(seq, (void **)idev->stats.ipv6_statistics, snmp6_ipstats_list); +#endif snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); +#if 0 + snmp6_seq_show_item(seq, (void **)idev->stats.udp_stats_in6, snmp6_udp6_list); +#endif } else { snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); @@ -284,19 +290,45 @@ if (!idev || !idev->dev) return -EINVAL; +#ifdef CONFIG_IPV6_STATISTICS + if (snmp6_mib_init((void **)idev->stats.ipv6_statistics, sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) + goto err_ip; +#endif + if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp; +#if 0 + if (snmp6_mib_init((void **)idev->stats.udp_stats_in6, sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) + goto err_udp; +#endif + return 0; +#if 0 +err_udp: + snmp6_mib_free((void **)idev->stats.icmpv6); +#endif err_icmp: +#ifdef CONFIG_IPV6_STATISTICS + snmp6_mib_free((void **)idev->stats.ipv6_statistics); +err_ip: +#endif return err; } int snmp6_free_dev(struct inet6_dev *idev) { +#ifdef CONFIG_IPV6_STATISTICS + snmp6_mib_free((void **)idev->stats.ipv6_statistics); +#endif snmp6_mib_free((void **)idev->stats.icmpv6); +#if 0 + snmp6_mib_free((void **)idev->stats.udp_stats_in6); +#endif return 0; } diff -urN linux-2.6.11/net/ipv6/raw.c x1/net/ipv6/raw.c --- linux-2.6.11/net/ipv6/raw.c 2005-03-02 08:38:07.000000000 +0100 +++ x1/net/ipv6/raw.c 2005-02-09 16:31:39.000000000 +0100 @@ -13,6 +13,10 @@ * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data + * Hoerdt Mickael : Added Ipv6 multicast routing support. + * + * Changes: + * Kazunori MIYAZAWA @USAGI: change datagram transmit routine to ip6_append_data * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -55,6 +59,10 @@ #include #include +#ifdef CONFIG_IPV6_MROUTE +#include +#endif + struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; DEFINE_RWLOCK(raw_v6_lock); @@ -162,7 +170,19 @@ sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr); while (sk) { - if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { + int filtered; + + switch (nexthdr) { + case IPPROTO_ICMPV6: + filtered = icmpv6_filter(sk, skb); + break; + default: + filtered = 0; + } + + if (filtered < 0) + break; + if (filtered == 0) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ @@ -199,6 +219,10 @@ if (sk->sk_state != TCP_CLOSE) goto out; + if (addr->sin6_port && + ntohs(addr->sin6_port) != inet->num) + goto out; + /* Check if the address belongs to the host. */ if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; @@ -407,8 +431,11 @@ /* Copy the address. */ if (sin6) { + struct inet_sock *inet = inet_sk(sk); + sin6->sin6_family = AF_INET6; ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr); + sin6->sin6_port = htons(inet->num); sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) @@ -509,6 +536,9 @@ struct inet_sock *inet = inet_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = NULL; +#endif unsigned int hh_len; int err; @@ -539,7 +569,12 @@ if (err) goto error_fault; +#ifdef CONFIG_IPV6_STATISTICS + idev = rt->rt6i_idev; + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); +#endif err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -553,7 +588,11 @@ err = -EFAULT; kfree_skb(skb); error: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); +#else IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); +#endif return err; } @@ -756,6 +795,8 @@ hlimit = np->hop_limit; if (hlimit < 0) hlimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hlimit < 0) + hlimit = ipv6_get_hoplimit(dst->dev); } if (msg->msg_flags&MSG_CONFIRM) @@ -952,7 +993,11 @@ } default: +#ifdef CONFIG_IPV6_MROUTE + return ip6mr_ioctl(sk,cmd,(void __user *)arg); +#else return -ENOIOCTLCMD; +#endif } } @@ -960,7 +1005,12 @@ { if (inet_sk(sk)->num == IPPROTO_RAW) ip6_ra_control(sk, -1, NULL); - +#ifdef CONFIG_IPV6_MROUTE + if (sk == mroute6_socket) { + printk(KERN_DEBUG "closing mroute6 socket.\n"); + ip6_ra_control(sk, -1, NULL); + } +#endif sk_common_release(sk); } diff -urN linux-2.6.11/net/ipv6/reassembly.c x1/net/ipv6/reassembly.c --- linux-2.6.11/net/ipv6/reassembly.c 2005-03-02 08:37:53.000000000 +0100 +++ x1/net/ipv6/reassembly.c 2005-02-13 19:37:11.000000000 +0100 @@ -53,6 +53,9 @@ #include #include #include +#ifdef CONFIG_IPV6_STATISTICS +#include +#endif int sysctl_ip6frag_high_thresh = 256*1024; int sysctl_ip6frag_low_thresh = 192*1024; @@ -264,7 +267,7 @@ } } -static void ip6_evictor(void) +static void ip6_evictor(struct inet6_dev *idev) { struct frag_queue *fq; struct list_head *tmp; @@ -291,14 +294,21 @@ spin_unlock(&fq->lock); fq_put(fq, &work); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); +#endif } } static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq = (struct frag_queue *) data; - + struct net_device *dev = NULL; +#ifdef CONFIG_IPV6_STATISTICS + struct inet6_dev *idev = NULL; +#endif spin_lock(&fq->lock); if (fq->last_in & COMPLETE) @@ -306,13 +316,19 @@ fq_kill(fq); +#ifdef CONFIG_IPV6_STATISTICS + dev = dev_get_by_index(fq->iif); + idev = dev ? in6_dev_get(dev) : NULL; + + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMTIMEOUT); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); +#endif /* Send error only if the first segment arrived. */ if (fq->last_in&FIRST_IN && fq->fragments) { - struct net_device *dev = dev_get_by_index(fq->iif); - /* But use as source device on which LAST ARRIVED segment was received. And do not use fq->dev @@ -322,9 +338,14 @@ fq->fragments->dev = dev; icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); - dev_put(dev); } } +#ifdef CONFIG_IPV6_STATISTICS + if (idev) + in6_dev_put(idev); + if (dev) + dev_put(dev); +#endif out: spin_unlock(&fq->lock); fq_put(fq, NULL); @@ -371,7 +392,11 @@ static struct frag_queue * +#ifdef CONFIG_IPV6_STATISTICS +ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev) +#else ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +#endif { struct frag_queue *fq; @@ -393,12 +418,20 @@ return ip6_frag_intern(hash, fq); oom: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); +#endif return NULL; } static __inline__ struct frag_queue * +#ifdef CONFIG_IPV6_STATISTICS +fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev) +#else fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +#endif { struct frag_queue *fq; unsigned int hash = ip6qhashfn(id, src, dst); @@ -415,7 +448,11 @@ } read_unlock(&ip6_frag_lock); +#ifdef CONFIG_IPV6_STATISTICS + return ip6_frag_create(hash, id, src, dst, idev); +#else return ip6_frag_create(hash, id, src, dst); +#endif } @@ -423,6 +460,11 @@ struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif + int offset, end; if (fq->last_in & COMPLETE) @@ -433,7 +475,11 @@ ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw); return; } @@ -460,7 +506,11 @@ /* RFC2460 says always send parameter problem in * this case. -DaveM */ +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); return; @@ -579,7 +629,11 @@ return; err: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_REASMFAILS); +#else IP6_INC_STATS(IPSTATS_MIB_REASMFAILS); +#endif kfree_skb(skb); } @@ -597,6 +651,11 @@ struct net_device *dev) { struct sk_buff *fp, *head = fq->fragments; +#ifdef CONFIG_IPV6_STATISTICS + struct sk_buff *skb = *skb_in; + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif int payload_len; unsigned int nhoff; @@ -673,7 +732,11 @@ if (head->ip_summed == CHECKSUM_HW) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMOKS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); +#endif fq->fragments = NULL; *nhoffp = nhoff; return 1; @@ -686,7 +749,11 @@ if (net_ratelimit()) printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); out_fail: +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); +#endif return -1; } @@ -697,19 +764,35 @@ struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; +#endif hdr = skb->nh.ipv6h; +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMREQDS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); +#endif /* Jumbo payload inhibits frag. header */ if (hdr->payload_len==0) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); return -1; } if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) { +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS(idev, IPSTATS_MIB_INHDRERRORS); +#else IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS); +#endif icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); return -1; } @@ -720,16 +803,29 @@ if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->h.raw += sizeof(struct frag_hdr); +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMOKS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); +#endif *nhoffp = (u8*)fhdr - skb->nh.raw; return 1; } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) - ip6_evictor(); + if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) { +#ifdef CONFIG_IPV6_STATISTICS + ip6_evictor(idev); +#else + ip6_evictor(NULL); +#endif + } +#ifdef CONFIG_IPV6_STATISTICS + if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, idev)) != NULL) { +#else if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) { +#endif int ret = -1; spin_lock(&fq->lock); @@ -745,7 +841,11 @@ return ret; } +#ifdef CONFIG_IPV6_STATISTICS + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); +#else IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); +#endif kfree_skb(skb); return -1; } diff -urN linux-2.6.11/net/ipv6/route.c x1/net/ipv6/route.c --- linux-2.6.11/net/ipv6/route.c 2005-03-02 08:38:17.000000000 +0100 +++ x1/net/ipv6/route.c 2005-02-28 07:45:55.000000000 +0100 @@ -136,7 +136,6 @@ DEFINE_RWLOCK(rt6_lock); - /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) { @@ -216,8 +215,9 @@ /* * pointer to the last default router chosen. BH is disabled locally. */ -static struct rt6_info *rt6_dflt_pointer; -static DEFINE_SPINLOCK(rt6_dflt_lock); +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) +struct rt6_info *rt6_dflt_pointer; +spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED; void rt6_reset_dflt_pointer(struct rt6_info *rt) { @@ -228,61 +228,168 @@ } spin_unlock_bh(&rt6_dflt_lock); } +#endif /* Default Router Selection (RFC 2461 6.3.6) */ -static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) +static int __rt6_score_dflt(struct rt6_info *sprt, struct rt6_info *dflt, int oif) { - struct rt6_info *match = NULL; - struct rt6_info *sprt; - int mpri = 0; + struct neighbour *neigh = sprt->rt6i_nexthop; + int m = oif ? 0 : 8; - for (sprt = rt; sprt; sprt = sprt->u.next) { - struct neighbour *neigh; - int m = 0; + if (!neigh) + return -1; - if (!oif || - (sprt->rt6i_dev && - sprt->rt6i_dev->ifindex == oif)) - m += 8; + if (rt6_check_expired(sprt)) + return -1; - if (rt6_check_expired(sprt)) - continue; + if (oif && sprt->rt6i_dev && + sprt->rt6i_dev->ifindex == oif) + m += 8; + +#if !defined(CONFIG_IPV6_ROUTER_PREF) + if (sprt == dflt) + m += 4; +#endif - if (sprt == rt6_dflt_pointer) - m += 4; + read_lock_bh(&neigh->lock); + switch (neigh->nud_state) { + case NUD_REACHABLE: + m += 3; + break; - if ((neigh = sprt->rt6i_nexthop) != NULL) { - read_lock_bh(&neigh->lock); - switch (neigh->nud_state) { - case NUD_REACHABLE: - m += 3; - break; + case NUD_STALE: + case NUD_DELAY: + case NUD_PROBE: + m += 2; + break; - case NUD_STALE: - case NUD_DELAY: - case NUD_PROBE: - m += 2; - break; + case NUD_NOARP: + case NUD_PERMANENT: + m += 1; + break; - case NUD_NOARP: - case NUD_PERMANENT: - m += 1; + case NUD_INCOMPLETE: + default: + m = -1; + } + read_unlock_bh(&neigh->lock); + + return m; +} + +static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, struct rt6_info **head, int oif) +{ + struct rt6_info *match = NULL; + struct rt6_info *sprt; +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + struct rt6_info *last = NULL; +#endif + int mpri = 0; +#if defined(CONFIG_IPV6_ROUTER_PREF) + u32 metric = 0; + int pref = -3; +#else + static const int okpri = 12; /* device match, prob. reachable */ +#endif + + if (head != NULL && *head != rt) + head = NULL; /*XXX*/ + +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) + spin_lock(&rt6_dflt_lock); +#endif +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) + if (rt6_dflt_pointer) { + for (sprt = rt; sprt; sprt = sprt->u.next) { + if (sprt == rt6_dflt_pointer) break; + } + if (!sprt) + rt6_dflt_pointer = NULL; /* for sure */ + } +#endif - case NUD_INCOMPLETE: - default: - read_unlock_bh(&neigh->lock); +#if defined(CONFIG_IPV6_ROUTER_PREF) +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) + if (rt6_dflt_pointer) { + for (sprt = rt6_dflt_pointer->u.next; sprt; sprt = sprt->u.next) { + int m, p; + + if ((metric != 0 && sprt->rt6i_metric > metric) || + sprt->u.dst.obsolete > 0 || + sprt->u.dst.error != 0) continue; + +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + m = __rt6_score_dflt(sprt, rt, oif); +#else + m = __rt6_score_dflt(sprt, rt6_dflt_pointer, oif); +#endif + if (m < mpri) + continue; + p = IPV6_SIGNEDPREF(IPV6_UNSHIFT_PREF(sprt->rt6i_flags)); + if (sprt->rt6i_metric < metric || m > mpri || p > pref) { + match = sprt; + metric = sprt->rt6i_metric; + mpri = m; + pref = p; } - read_unlock_bh(&neigh->lock); - } else { + } + } +#endif +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + metric = rt->rt6i_metric; +#endif + for (sprt = rt; sprt; sprt = sprt->u.next) { + int m, p; +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + if (sprt->rt6i_metric > metric) + break; +#else + if ((metric != 0 && sprt->rt6i_metric > metric) || + sprt->u.dst.obsolete > 0 || + sprt->u.dst.error != 0) continue; + +#endif +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + m = __rt6_score_dflt(sprt, rt, oif); +#else + m = __rt6_score_dflt(sprt, rt6_dflt_pointer, oif); +#endif + if (m < mpri) + continue; + p = IPV6_SIGNEDPREF(IPV6_UNSHIFT_PREF(sprt->rt6i_flags)); + if ( +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) + sprt->rt6i_metric < metric || +#endif + m > mpri || p > pref) { + match = sprt; + metric = sprt->rt6i_metric; + mpri = m; + pref = p; } +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + last = sprt; +#else + if (sprt == rt6_dflt_pointer) + break; +#endif + } +#else /* CONFIG_IPV6_ROUTER_PREF / !CONFIG_IPV6_ROUTER_PREF */ + for (sprt = rt; sprt; sprt = sprt->u.next) { + int m; +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + m = __rt6_score_dflt(sprt, rt, oif); +#else + m = __rt6_score_dflt(sprt, rt6_dflt_pointer, oif); +#endif - if (m > mpri || m >= 12) { + if (m > mpri || m >= okpri) { match = sprt; mpri = m; - if (m >= 12) { + if (m >= okpri) { /* we choose the last default router if it * is in (probably) reachable state. * If route changed, we should do pmtu @@ -293,7 +400,6 @@ } } - spin_lock(&rt6_dflt_lock); if (!match) { /* * No default routers are known to be reachable. @@ -323,14 +429,25 @@ } } } +#endif /* !CONFIG_IPV6_ROUTER_PREF */ if (match) { +#if defined(CONFIG_IPV6_NEW_ROUNDROBIN) + if (rt != last && last) { + *head = rt->u.next; + rt->u.next = last->u.next; + last->u.next = rt; + } +#else if (rt6_dflt_pointer != match) RT6_TRACE("changed default router: %p->%p\n", rt6_dflt_pointer, match); rt6_dflt_pointer = match; +#endif } +#if !defined(CONFIG_IPV6_NEW_ROUNDROBIN) spin_unlock(&rt6_dflt_lock); +#endif if (!match) { /* @@ -540,7 +657,7 @@ } if (rt->rt6i_flags & RTF_DEFAULT) { if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) - rt = rt6_best_dflt(rt, fl->oif); + rt = rt6_best_dflt(rt, &fn->leaf, fl->oif); } else { rt = rt6_device_match(rt, fl->oif, strict); BACKTRACK(); @@ -575,7 +692,6 @@ return &rt->u.dst; } - /* * Destination cache support functions */ @@ -628,8 +744,10 @@ if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) + if (mtu < IPV6_MIN_MTU) { mtu = IPV6_MIN_MTU; + dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + } dst->metrics[RTAX_MTU-1] = mtu; } } @@ -769,7 +887,7 @@ return mtu; } -static int ipv6_get_hoplimit(struct net_device *dev) +int ipv6_get_hoplimit(struct net_device *dev) { int hoplimit = ipv6_devconf.hop_limit; struct inet6_dev *idev; @@ -965,14 +1083,8 @@ } } - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) { - if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = - IPV6_DEFAULT_MCASTHOPS; - else - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = - ipv6_get_hoplimit(dev); - } + if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; if (!rt->u.dst.metrics[RTAX_MTU-1]) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); @@ -998,7 +1110,9 @@ write_lock_bh(&rt6_lock); +#ifndef CONFIG_IPV6_NEW_ROUNDROBIN rt6_reset_dflt_pointer(NULL); +#endif err = fib6_del(rt, nlh, _rtattr); dst_release(&rt->u.dst); @@ -1051,11 +1165,24 @@ { struct rt6_info *rt, *nrt; +#ifdef CONFIG_IPV6_NDISC_DEBUG + printk(KERN_DEBUG + "%s(" + "dest=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x, " + "saddr=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x, " + "neigh=%p(%s), " + "lladdr=%p, on_link=%d)\n", + __FUNCTION__, + NIP6(*dest), NIP6(*saddr), + neigh, neigh_state(neigh->nud_state), + lladdr, on_link); +#endif + /* Locate old route to this destination. */ rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1); if (rt == NULL) - return; + goto out; if (neigh->dev != rt->rt6i_dev) goto out; @@ -1068,8 +1195,12 @@ * But then router serving it might decide, that we should * know truth 8)8) --ANK (980726). */ - if (!(rt->rt6i_flags&RTF_GATEWAY)) + if (!(rt->rt6i_flags&RTF_GATEWAY)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s(): rt=%p is on-link; ignored.\n", + __FUNCTION__, rt); goto out; + } /* * RFC 2461 specifies that redirects should only be @@ -1101,9 +1232,8 @@ } source_ok: - /* - * We have finally decided to accept it. + * Okay, we have finally decided to accept it. */ neigh_update(neigh, lladdr, NUD_STALE, @@ -1152,7 +1282,6 @@ out: dst_release(&rt->u.dst); - return; } /* @@ -1164,17 +1293,7 @@ struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; - - if (pmtu < IPV6_MIN_MTU) { - if (net_ratelimit()) - printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n", - pmtu); - /* According to RFC1981, the PMTU is set to the IPv6 minimum - link MTU if the node receives a Packet Too Big message - reporting next-hop MTU that is less than the IPv6 minimum MTU. - */ - pmtu = IPV6_MIN_MTU; - } + int allfrag = 0; rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); @@ -1184,6 +1303,17 @@ if (pmtu >= dst_pmtu(&rt->u.dst)) goto out; + if (pmtu < IPV6_MIN_MTU) { + /* + * According to RFC2461, pmtu is set to the IPv6 minimum MTU + * (1280) and a fragment header is included after a node + * receiving Too Big message reporting PMTU is less than + * the IPv6 minimum MTU. + */ + pmtu = IPV6_MIN_MTU; + allfrag = 1; + } + /* New mtu received -> path was valid. They are sent only in response to data packets, so that this nexthop apparently is reachable. --ANK @@ -1197,6 +1327,8 @@ */ if (rt->rt6i_flags & RTF_CACHE) { rt->u.dst.metrics[RTAX_MTU-1] = pmtu; + if (allfrag) + rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; @@ -1211,6 +1343,8 @@ nrt = rt6_cow(rt, daddr, saddr); if (!nrt->u.dst.error) { nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + if (allfrag) + nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; /* According to RFC 1981, detecting PMTU increase shouldn't be happened within 5 mins, the recommended timer is 10 mins. Here this route expiration time is set to ip6_rt_mtu_expires @@ -1232,6 +1366,8 @@ dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + if (allfrag) + nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; ip6_ins_rt(nrt, NULL, NULL); } @@ -1293,7 +1429,8 @@ } struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, - struct net_device *dev) + struct net_device *dev, + int pref) { struct in6_rtmsg rtmsg; @@ -1301,7 +1438,7 @@ rtmsg.rtmsg_type = RTMSG_NEWROUTE; ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES; + rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_PREF(pref&3) | RTF_EXPIRES; rtmsg.rtmsg_ifindex = dev->ifindex; @@ -1319,7 +1456,9 @@ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); +#ifndef CONFIG_IPV6_NEW_ROUNDROBIN rt6_reset_dflt_pointer(NULL); +#endif read_unlock_bh(&rt6_lock); @@ -1371,7 +1510,13 @@ int ip6_pkt_discard(struct sk_buff *skb) { +#ifdef CONFIG_IPV6_STATISTICS + struct dst_entry *dst = skb->dst; + struct inet6_dev *idev = ((struct rt6_info *)dst)->rt6i_idev; + IP6_INC_STATS(idev, IPSTATS_MIB_OUTNOROUTES); +#else IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); +#endif icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); kfree_skb(skb); return 0; @@ -1406,7 +1551,7 @@ rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst)); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev); + rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; rt->u.dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; @@ -1877,7 +2022,6 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; - int i; if (arg->skip < arg->offset / RT6_INFO_LEN) { arg->skip++; @@ -1887,39 +2031,29 @@ if (arg->len >= arg->length) return 0; - for (i=0; i<16; i++) { - sprintf(arg->buffer + arg->len, "%02x", - rt->rt6i_dst.addr.s6_addr[i]); - arg->len += 2; - } - arg->len += sprintf(arg->buffer + arg->len, " %02x ", + arg->len += sprintf(arg->buffer + arg->len, + "%04x%04x%04x%04x%04x%04x%04x%04x %02x ", + NIP6(rt->rt6i_dst.addr), rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - for (i=0; i<16; i++) { - sprintf(arg->buffer + arg->len, "%02x", - rt->rt6i_src.addr.s6_addr[i]); - arg->len += 2; - } - arg->len += sprintf(arg->buffer + arg->len, " %02x ", + arg->len += sprintf(arg->buffer + arg->len, + "%04x%04x%04x%04x%04x%04x%04x%04x %02x ", + NIP6(rt->rt6i_src.addr), rt->rt6i_src.plen); #else - sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000 00 "); - arg->len += 36; + arg->len += sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000 00 "); #endif - if (rt->rt6i_nexthop) { - for (i=0; i<16; i++) { - sprintf(arg->buffer + arg->len, "%02x", - rt->rt6i_nexthop->primary_key[i]); - arg->len += 2; - } - } else { - sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000"); - arg->len += 32; - } + if (rt->rt6i_nexthop) + arg->len += sprintf(arg->buffer + arg->len, + "%04x%04x%04x%04x%04x%04x%04x%04x", + NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + else + arg->len += sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000"); + arg->len += sprintf(arg->buffer + arg->len, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), @@ -2080,6 +2214,15 @@ .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, + { + .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, + .procname = "gc_min_interval_ms", + .data = &ip6_rt_gc_min_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, { .ctl_name = 0 } }; diff -urN linux-2.6.11/net/ipv6/udp.c x1/net/ipv6/udp.c --- linux-2.6.11/net/ipv6/udp.c 2005-03-02 08:38:20.000000000 +0100 +++ x1/net/ipv6/udp.c 2005-02-28 07:45:55.000000000 +0100 @@ -277,7 +277,6 @@ if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6->sin6_scope_id = IP6CB(skb)->iif; } - } if (skb->protocol == htons(ETH_P_IP)) { if (inet->cmsg_flags) @@ -811,6 +810,8 @@ hlimit = np->hop_limit; if (hlimit < 0) hlimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hlimit < 0) + hlimit = ipv6_get_hoplimit(dst->dev); } if (msg->msg_flags&MSG_CONFIRM) diff -urN linux-2.6.11/net/ipv6/xfrm6_input.c x1/net/ipv6/xfrm6_input.c --- linux-2.6.11/net/ipv6/xfrm6_input.c 2005-03-02 08:38:33.000000000 +0100 +++ x1/net/ipv6/xfrm6_input.c 2004-09-30 15:26:35.000000000 +0200 @@ -42,7 +42,7 @@ nexthdr = skb->nh.raw[nhoff]; seq = 0; - if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi)) != 0) goto drop; do { @@ -58,9 +58,6 @@ if (unlikely(x->km.state != XFRM_STATE_VALID)) goto drop_unlock; - if (x->props.replay_window && xfrm_replay_check(x, seq)) - goto drop_unlock; - if (xfrm_state_check_expire(x)) goto drop_unlock; @@ -70,9 +67,6 @@ skb->nh.raw[nhoff] = nexthdr; - if (x->props.replay_window) - xfrm_replay_advance(x, seq); - x->curlft.bytes += skb->len; x->curlft.packets++; @@ -99,8 +93,9 @@ break; } - if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0) + if ((err = xfrm_parse_spi(skb, nexthdr, &spi)) < 0) goto drop; + } while (!err); /* Allocate new secpath or COW existing one. */ diff -urN linux-2.6.11/net/ipv6/xfrm6_policy.c x1/net/ipv6/xfrm6_policy.c --- linux-2.6.11/net/ipv6/xfrm6_policy.c 2005-03-02 08:37:50.000000000 +0100 +++ x1/net/ipv6/xfrm6_policy.c 2004-11-25 06:33:10.000000000 +0100 @@ -25,8 +25,8 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) { int err = 0; - *dst = (struct xfrm_dst*)ip6_route_output(NULL, fl); - if (!*dst) + err = ip6_dst_lookup(NULL, (struct dst_entry **)dst, fl); + if (err) err = -ENETUNREACH; return err; } @@ -56,7 +56,6 @@ { struct dst_entry *dst; - /* Still not clear if we should set fl->fl6_{src,dst}... */ read_lock_bh(&policy->lock); for (dst = policy->bundles; dst; dst = dst->next) { struct xfrm_dst *xdst = (struct xfrm_dst*)dst; diff -urN linux-2.6.11/net/xfrm/xfrm_input.c x1/net/xfrm/xfrm_input.c --- linux-2.6.11/net/xfrm/xfrm_input.c 2005-03-02 08:38:34.000000000 +0100 +++ x1/net/xfrm/xfrm_input.c 2005-02-18 10:36:26.000000000 +0100 @@ -46,24 +46,21 @@ /* Fetch spi and seq from ipsec header */ -int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi) { - int offset, offset_seq; + int offset; switch (nexthdr) { case IPPROTO_AH: offset = offsetof(struct ip_auth_hdr, spi); - offset_seq = offsetof(struct ip_auth_hdr, seq_no); break; case IPPROTO_ESP: offset = offsetof(struct ip_esp_hdr, spi); - offset_seq = offsetof(struct ip_esp_hdr, seq_no); break; case IPPROTO_COMP: if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) return -EINVAL; *spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2))); - *seq = 0; return 0; default: return 1; @@ -73,7 +70,6 @@ return -EINVAL; *spi = *(u32*)(skb->h.raw + offset); - *seq = *(u32*)(skb->h.raw + offset_seq); return 0; } EXPORT_SYMBOL(xfrm_parse_spi); diff -urN linux-2.6.11/net/xfrm/xfrm_policy.c x1/net/xfrm/xfrm_policy.c --- linux-2.6.11/net/xfrm/xfrm_policy.c 2005-03-02 08:38:09.000000000 +0100 +++ x1/net/xfrm/xfrm_policy.c 2005-02-28 07:45:55.000000000 +0100 @@ -703,9 +703,14 @@ static inline int policy_to_flow_dir(int dir) { +#ifdef CONFIG_USE_POLICY_FWD if (XFRM_POLICY_IN == FLOW_DIR_IN && XFRM_POLICY_OUT == FLOW_DIR_OUT && XFRM_POLICY_FWD == FLOW_DIR_FWD) +#else + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT) +#endif return dir; switch (dir) { default: @@ -713,8 +718,10 @@ return FLOW_DIR_IN; case XFRM_POLICY_OUT: return FLOW_DIR_OUT; +#ifdef CONFIG_USE_POLICY_FWD case XFRM_POLICY_FWD: return FLOW_DIR_FWD; +#endif }; } diff -urN linux-2.6.11/net/xfrm/xfrm_user.c x1/net/xfrm/xfrm_user.c --- linux-2.6.11/net/xfrm/xfrm_user.c 2005-03-02 08:38:10.000000000 +0100 +++ x1/net/xfrm/xfrm_user.c 2005-02-03 06:35:55.000000000 +0100 @@ -530,7 +530,9 @@ switch (dir) { case XFRM_POLICY_IN: case XFRM_POLICY_OUT: +#ifdef CONFIG_USE_POLICY_FWD case XFRM_POLICY_FWD: +#endif break; default: @@ -1136,14 +1138,14 @@ switch (family) { case AF_INET: - if (opt != IP_XFRM_POLICY) { + if (opt != IP_XFRM_POLICY && opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - if (opt != IPV6_XFRM_POLICY) { + if (opt != IPV6_XFRM_POLICY && opt != IPV6_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; }