patch-2.4.0-test3 linux/net/ipv4/ip_fragment.c
Next file: linux/net/ipv4/ip_gre.c
Previous file: linux/net/ipv4/arp.c
Back to the patch index
Back to the overall index
- Lines: 987
- Date:
Sun Jul 9 22:30:41 2000
- Orig file:
v2.4.0-test2/linux/net/ipv4/ip_fragment.c
- Orig date:
Wed Apr 26 16:34:09 2000
diff -u --recursive --new-file v2.4.0-test2/linux/net/ipv4/ip_fragment.c linux/net/ipv4/ip_fragment.c
@@ -5,7 +5,7 @@
*
* The IP fragmentation functionality.
*
- * Version: $Id: ip_fragment.c,v 1.49 2000/04/15 01:48:10 davem Exp $
+ * Version: $Id: ip_fragment.c,v 1.50 2000/07/07 22:29:42 davem Exp $
*
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
@@ -18,6 +18,7 @@
* Ultima : ip_expire() kernel panic.
* Bill Hawes : Frag accounting and evictor fixes.
* John McDonald : 0 length frag bug.
+ * Alexey Kuznetsov: SMP races, threading, cleanup.
*/
#include <linux/config.h>
@@ -31,11 +32,17 @@
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
+#include <net/checksum.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>
+/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
+ * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
+ * as well. Or notify me, at least. --ANK
+ */
+
/* Fragment cache limits. We will commit 256K at one time. Should we
* cross that limit we will prune down to 192K. This should cope with
* even the most extreme cases without allowing an attacker to measurably
@@ -46,38 +53,77 @@
int sysctl_ipfrag_time = IP_FRAG_TIME;
-/* Describe an IP fragment. */
-struct ipfrag {
- int offset; /* offset of fragment in IP datagram */
- int end; /* last byte of data in datagram */
- int len; /* length of this fragment */
- struct sk_buff *skb; /* complete received fragment */
- unsigned char *ptr; /* pointer into real fragment data */
- struct ipfrag *next; /* linked list pointers */
- struct ipfrag *prev;
+struct ipfrag_skb_cb
+{
+ struct inet_skb_parm h;
+ int offset;
};
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb))
+
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
- struct iphdr *iph; /* pointer to IP header */
struct ipq *next; /* linked list pointers */
- struct ipfrag *fragments; /* linked list of received fragments */
+ u32 saddr;
+ u32 daddr;
+ u16 id;
+ u8 protocol;
+ u8 last_in;
+#define COMPLETE 4
+#define FIRST_IN 2
+#define LAST_IN 1
+
+ struct sk_buff *fragments; /* linked list of received fragments */
int len; /* total length of original datagram */
- short ihlen; /* length of the IP header */
+ int meat;
+ spinlock_t lock;
+ atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
struct ipq **pprev;
- struct net_device *dev; /* Device - for icmp replies */
+ struct net_device *dev; /* Device - for icmp replies */
};
+/* Hash table. */
+
#define IPQ_HASHSZ 64
+/* Per-bucket lock is easy to add now. */
static struct ipq *ipq_hash[IPQ_HASHSZ];
-static spinlock_t ipfrag_lock = SPIN_LOCK_UNLOCKED;
+static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED;
+int ip_frag_nqueues = 0;
+
+static __inline__ void __ipq_unlink(struct ipq *qp)
+{
+ if(qp->next)
+ qp->next->pprev = qp->pprev;
+ *qp->pprev = qp->next;
+ ip_frag_nqueues--;
+}
+
+static __inline__ void ipq_unlink(struct ipq *ipq)
+{
+ write_lock(&ipfrag_lock);
+ __ipq_unlink(ipq);
+ write_unlock(&ipfrag_lock);
+}
+
+/*
+ * Was: ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
+ *
+ * I see, I see evil hand of bigendian mafia. On Intel all the packets hit
+ * one hash bucket with this hash function. 8)
+ */
+static __inline__ unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
+{
+ unsigned int h = saddr ^ daddr;
+
+ h ^= (h>>16)^id;
+ h ^= (h>>8)^prot;
+ return h & (IPQ_HASHSZ - 1);
+}
-#define ipqhashfn(id, saddr, daddr, prot) \
- ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
-static atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
+atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
/* Memory Tracking Functions. */
extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
@@ -86,112 +132,106 @@
kfree_skb(skb);
}
-extern __inline__ void frag_kfree_s(void *ptr, int len)
+extern __inline__ void frag_free_queue(struct ipq *qp)
{
- atomic_sub(len, &ip_frag_mem);
- kfree(ptr);
+ atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+ kfree(qp);
}
-
-extern __inline__ void *frag_kmalloc(int size, int pri)
+
+extern __inline__ struct ipq *frag_alloc_queue(void)
{
- void *vp = kmalloc(size, pri);
+ struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
- if(!vp)
+ if(!qp)
return NULL;
- atomic_add(size, &ip_frag_mem);
- return vp;
+ atomic_add(sizeof(struct ipq), &ip_frag_mem);
+ return qp;
}
-
-/* Create a new fragment entry. */
-static struct ipfrag *ip_frag_create(int offset, int end,
- struct sk_buff *skb, unsigned char *ptr)
+
+
+/* Destruction primitives. */
+
+/* Complete destruction of ipq. */
+static void ip_frag_destroy(struct ipq *qp)
{
- struct ipfrag *fp;
+ struct sk_buff *fp;
- fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
- if (fp == NULL)
- goto out_nomem;
+ BUG_TRAP(qp->last_in&COMPLETE);
+ BUG_TRAP(del_timer(&qp->timer) == 0);
- /* Fill in the structure. */
- fp->offset = offset;
- fp->end = end;
- fp->len = end - offset;
- fp->skb = skb;
- fp->ptr = ptr;
- fp->next = fp->prev = NULL;
-
- /* Charge for the SKB as well. */
- atomic_add(skb->truesize, &ip_frag_mem);
+ /* Release all fragment data. */
+ fp = qp->fragments;
+ while (fp) {
+ struct sk_buff *xp = fp->next;
- return(fp);
+ frag_kfree_skb(fp);
+ fp = xp;
+ }
-out_nomem:
- NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n"));
- return(NULL);
+ /* Finally, release the queue descriptor itself. */
+ frag_free_queue(qp);
}
-/* Find the correct entry in the "incomplete datagrams" queue for
- * this IP datagram, and return the queue entry address if found.
- */
-static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst)
+static __inline__ void ipq_put(struct ipq *ipq)
{
- __u16 id = iph->id;
- __u32 saddr = iph->saddr;
- __u32 daddr = iph->daddr;
- __u8 protocol = iph->protocol;
- unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
- struct ipq *qp;
-
- /* We are always in BH context, and protected by the
- * ipfrag lock.
- */
- for(qp = ipq_hash[hash]; qp; qp = qp->next) {
- if(qp->iph->id == id &&
- qp->iph->saddr == saddr &&
- qp->iph->daddr == daddr &&
- qp->iph->protocol == protocol) {
- del_timer(&qp->timer);
- break;
- }
- }
- return qp;
+ if (atomic_dec_and_test(&ipq->refcnt))
+ ip_frag_destroy(ipq);
}
-/* Remove an entry from the "incomplete datagrams" queue, either
- * because we completed, reassembled and processed it, or because
- * it timed out.
- *
- * This is called _only_ from BH contexts with the ipfrag lock held,
- * on packet reception processing and from frag queue expiration
- * timers. -DaveM
+/* Kill ipq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
*/
-static void ip_free(struct ipq *qp)
+static __inline__ void ipq_kill(struct ipq *ipq)
{
- struct ipfrag *fp;
-
- /* Stop the timer for this entry. */
- del_timer(&qp->timer);
-
- /* Remove this entry from the "incomplete datagrams" queue. */
- if(qp->next)
- qp->next->pprev = qp->pprev;
- *qp->pprev = qp->next;
-
- /* Release all fragment data. */
- fp = qp->fragments;
- while (fp) {
- struct ipfrag *xp = fp->next;
+ if (del_timer(&ipq->timer))
+ atomic_dec(&ipq->refcnt);
- frag_kfree_skb(fp->skb);
- frag_kfree_s(fp, sizeof(struct ipfrag));
- fp = xp;
+ if (!(ipq->last_in & COMPLETE)) {
+ ipq_unlink(ipq);
+ atomic_dec(&ipq->refcnt);
+ ipq->last_in |= COMPLETE;
}
+}
- /* Release the IP header. */
- frag_kfree_s(qp->iph, 64 + 8);
+/* Memory limiting on fragments. Evictor trashes the oldest
+ * fragment queue until we are back under the low threshold.
+ */
+static void ip_evictor(void)
+{
+ int i, progress;
- /* Finally, release the queue descriptor itself. */
- frag_kfree_s(qp, sizeof(struct ipq));
+ do {
+ if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh)
+ return;
+ progress = 0;
+ /* FIXME: Make LRU queue of frag heads. -DaveM */
+ for (i = 0; i < IPQ_HASHSZ; i++) {
+ struct ipq *qp;
+ if (ipq_hash[i] == NULL)
+ continue;
+
+ write_lock(&ipfrag_lock);
+ if ((qp = ipq_hash[i]) != NULL) {
+ /* find the oldest queue for this hash bucket */
+ while (qp->next)
+ qp = qp->next;
+ __ipq_unlink(qp);
+ write_unlock(&ipfrag_lock);
+
+ spin_lock(&qp->lock);
+ if (del_timer(&qp->timer))
+ atomic_dec(&qp->refcnt);
+ qp->last_in |= COMPLETE;
+ spin_unlock(&qp->lock);
+
+ ipq_put(qp);
+ IP_INC_STATS_BH(IpReasmFails);
+ progress = 1;
+ continue;
+ }
+ write_unlock(&ipfrag_lock);
+ }
+ } while (progress);
}
/*
@@ -201,181 +241,310 @@
{
struct ipq *qp = (struct ipq *) arg;
- spin_lock(&ipfrag_lock);
- if(!qp->fragments)
- {
-#ifdef IP_EXPIRE_DEBUG
- printk("warning: possible ip-expire attack\n");
-#endif
+ spin_lock(&qp->lock);
+
+ if (qp->last_in & COMPLETE)
goto out;
- }
-
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
+
+ ipq_kill(qp);
+
IP_INC_STATS_BH(IpReasmTimeout);
IP_INC_STATS_BH(IpReasmFails);
- icmp_send(qp->fragments->skb, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
+ /* Send an ICMP "Fragment Reassembly Timeout" message. */
+ icmp_send(qp->fragments, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ }
out:
- /* Nuke the fragment queue. */
- ip_free(qp);
- spin_unlock(&ipfrag_lock);
+ spin_unlock(&qp->lock);
+ ipq_put(qp);
}
-/* Memory limiting on fragments. Evictor trashes the oldest
- * fragment queue until we are back under the low threshold.
- *
- * We are always called in BH with the ipfrag lock held.
- */
-static void ip_evictor(void)
+/* Creation primitives. */
+
+static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
{
- int i, progress;
+ struct ipq *qp;
-restart:
- progress = 0;
- /* FIXME: Make LRU queue of frag heads. -DaveM */
- for (i = 0; i < IPQ_HASHSZ; i++) {
- struct ipq *qp;
- if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh)
- return;
- qp = ipq_hash[i];
- if (qp) {
- /* find the oldest queue for this hash bucket */
- while (qp->next)
- qp = qp->next;
- ip_free(qp);
- progress = 1;
+ write_lock(&ipfrag_lock);
+#ifdef CONFIG_SMP
+ /* With SMP race we have to recheck hash table, because
+ * such entry could be created on other cpu, while we
+ * promoted read lock to write lock.
+ */
+ for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+ if(qp->id == qp_in->id &&
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+ qp->protocol == qp_in->protocol) {
+ atomic_inc(&qp->refcnt);
+ write_unlock(&ipfrag_lock);
+ qp_in->last_in |= COMPLETE;
+ ipq_put(qp_in);
+ return qp;
}
}
- if (progress)
- goto restart;
- panic("ip_evictor: memcount");
+#endif
+ qp = qp_in;
+
+ atomic_inc(&qp->refcnt);
+ if((qp->next = ipq_hash[hash]) != NULL)
+ qp->next->pprev = &qp->next;
+ ipq_hash[hash] = qp;
+ qp->pprev = &ipq_hash[hash];
+ ip_frag_nqueues++;
+ write_unlock(&ipfrag_lock);
+ return qp;
}
-/* Add an entry to the 'ipq' queue for a newly received IP datagram.
- * We will (hopefully :-) receive all other fragments of this datagram
- * in time, so we just create a queue for this datagram, in which we
- * will insert the received fragments at their respective positions.
- */
-static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
+/* Add an entry to the 'ipq' queue for a newly received IP datagram. */
+static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
{
struct ipq *qp;
- unsigned int hash;
- int ihlen;
- qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC);
- if (qp == NULL)
+ if ((qp = frag_alloc_queue()) == NULL)
goto out_nomem;
- /* Allocate memory for the IP header (plus 8 octets for ICMP). */
- ihlen = iph->ihl * 4;
-
- qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC);
- if (qp->iph == NULL)
- goto out_free;
-
- memcpy(qp->iph, iph, ihlen + 8);
+ qp->protocol = iph->protocol;
+ qp->last_in = 0;
+ qp->id = iph->id;
+ qp->saddr = iph->saddr;
+ qp->daddr = iph->daddr;
qp->len = 0;
- qp->ihlen = ihlen;
+ qp->meat = 0;
qp->fragments = NULL;
- qp->dev = skb->dev;
/* Initialize a timer for this entry. */
init_timer(&qp->timer);
- qp->timer.expires = 0; /* (to be set later) */
qp->timer.data = (unsigned long) qp; /* pointer to queue */
qp->timer.function = ip_expire; /* expire function */
+ qp->lock = SPIN_LOCK_UNLOCKED;
+ atomic_set(&qp->refcnt, 1);
- /* Add this entry to the queue. */
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
+ return ip_frag_intern(hash, qp);
- /* In a BH context and ipfrag lock is held. -DaveM */
- if((qp->next = ipq_hash[hash]) != NULL)
- qp->next->pprev = &qp->next;
- ipq_hash[hash] = qp;
- qp->pprev = &ipq_hash[hash];
+out_nomem:
+ NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n"));
+ return NULL;
+}
- return qp;
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and create new one, if nothing is found.
+ */
+static inline struct ipq *ip_find(struct iphdr *iph)
+{
+ __u16 id = iph->id;
+ __u32 saddr = iph->saddr;
+ __u32 daddr = iph->daddr;
+ __u8 protocol = iph->protocol;
+ unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
+ struct ipq *qp;
-out_free:
- frag_kfree_s(qp, sizeof(struct ipq));
-out_nomem:
- NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n"));
- return(NULL);
+ read_lock(&ipfrag_lock);
+ for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+ if(qp->id == id &&
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+ qp->protocol == protocol) {
+ atomic_inc(&qp->refcnt);
+ read_unlock(&ipfrag_lock);
+ return qp;
+ }
+ }
+ read_unlock(&ipfrag_lock);
+
+ return ip_frag_create(hash, iph);
}
-/* See if a fragment queue is complete. */
-static int ip_done(struct ipq *qp)
+/* Add new segment to existing queue. */
+static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
- struct ipfrag *fp;
- int offset;
+ struct iphdr *iph = skb->nh.iph;
+ struct sk_buff *prev, *next;
+ int flags, offset;
+ int ihl, end;
- /* Only possible if we received the final fragment. */
- if (qp->len == 0)
- return 0;
+ if (qp->last_in & COMPLETE)
+ goto err;
- /* Check all fragment offsets to see if they connect. */
- fp = qp->fragments;
- offset = 0;
- while (fp) {
- if (fp->offset > offset)
- return(0); /* fragment(s) missing */
- offset = fp->end;
- fp = fp->next;
+ if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
+ atomic_inc(&qp->refcnt);
+
+ offset = ntohs(iph->frag_off);
+ flags = offset & ~IP_OFFSET;
+ offset &= IP_OFFSET;
+ offset <<= 3; /* offset is in 8-byte chunks */
+ ihl = iph->ihl * 4;
+
+ /* Determine the position of this fragment. */
+ end = offset + (ntohs(iph->tot_len) - ihl);
+
+ /* Is this the final fragment? */
+ if ((flags & IP_MF) == 0) {
+ /* If we already have some bits beyond end
+ * or have different end, the segment is corrrupted.
+ */
+ if (end < qp->len ||
+ ((qp->last_in & LAST_IN) && end != qp->len))
+ goto err;
+ qp->last_in |= LAST_IN;
+ qp->len = end;
+ } else {
+ if (end&7) {
+ end &= ~7;
+ if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ if (end > qp->len) {
+ /* Some bits beyond end -> corruption. */
+ if (qp->last_in & LAST_IN)
+ goto err;
+ qp->len = end;
+ }
+ }
+ if (end == offset)
+ goto err;
+
+ /* Point into the IP datagram 'data' part. */
+ skb_pull(skb, (skb->nh.raw+ihl) - skb->data);
+ skb_trim(skb, end - offset);
+
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
+ */
+ prev = NULL;
+ for(next = qp->fragments; next != NULL; next = next->next) {
+ if (FRAG_CB(next)->offset >= offset)
+ break; /* bingo! */
+ prev = next;
+ }
+
+ /* We found where to put this one. Check for overlap with
+ * preceding fragment, and, if needed, align things so that
+ * any overlaps are eliminated.
+ */
+ if (prev) {
+ int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+
+ if (i > 0) {
+ offset += i;
+ if (end <= offset)
+ goto err;
+ skb_pull(skb, i);
+ if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ }
+
+ while (next && FRAG_CB(next)->offset < end) {
+ int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+
+ if (i < next->len) {
+ /* Eat head of the next overlapped fragment
+ * and leave the loop. The next ones cannot overlap.
+ */
+ FRAG_CB(next)->offset += i;
+ skb_pull(next, i);
+ qp->meat -= i;
+ if (next->ip_summed != CHECKSUM_UNNECESSARY)
+ next->ip_summed = CHECKSUM_NONE;
+ break;
+ } else {
+ struct sk_buff *free_it = next;
+
+ /* Old fragmnet is completely overridden with
+ * new one drop it.
+ */
+ next = next->next;
+
+ if (prev)
+ prev->next = next;
+ else
+ qp->fragments = next;
+
+ qp->meat -= free_it->len;
+ frag_kfree_skb(free_it);
+ }
}
- /* All fragments are present. */
- return 1;
+ FRAG_CB(skb)->offset = offset;
+
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+ if (prev)
+ prev->next = skb;
+ else
+ qp->fragments = skb;
+
+ qp->dev = skb->dev;
+ qp->meat += skb->len;
+ atomic_add(skb->truesize, &ip_frag_mem);
+ if (offset == 0)
+ qp->last_in |= FIRST_IN;
+
+ return;
+
+err:
+ kfree_skb(skb);
}
+
/* Build a new IP datagram from all its fragments.
*
* FIXME: We copy here because we lack an effective way of handling lists
* of bits on input. Until the new skb data handling is in I'm not going
* to touch this with a bargepole.
*/
-static struct sk_buff *ip_glue(struct ipq *qp)
+static struct sk_buff *ip_frag_reasm(struct ipq *qp)
{
struct sk_buff *skb;
struct iphdr *iph;
- struct ipfrag *fp;
- unsigned char *ptr;
- int count, len;
+ struct sk_buff *fp, *head = qp->fragments;
+ int len;
+ int ihlen;
+
+ ipq_kill(qp);
+
+ BUG_TRAP(head != NULL);
+ BUG_TRAP(FRAG_CB(head)->offset == 0);
/* Allocate a new buffer for the datagram. */
- len = qp->ihlen + qp->len;
-
+ ihlen = head->nh.iph->ihl*4;
+ len = ihlen + qp->len;
+
if(len > 65535)
goto out_oversize;
-
+
skb = dev_alloc_skb(len);
if (!skb)
goto out_nomem;
/* Fill in the basic details. */
- skb->mac.raw = ptr = skb->data;
- skb->nh.iph = iph = (struct iphdr *) skb_put(skb, len);
+ skb->mac.raw = skb->data;
+ skb->nh.raw = skb->data;
+ FRAG_CB(skb)->h = FRAG_CB(head)->h;
+ skb->ip_summed = head->ip_summed;
+ skb->csum = 0;
/* Copy the original IP headers into the new buffer. */
- memcpy(ptr, qp->iph, qp->ihlen);
- ptr += qp->ihlen;
+ memcpy(skb_put(skb, ihlen), head->nh.iph, ihlen);
/* Copy the data portions of all fragments into the new buffer. */
- fp = qp->fragments;
- count = qp->ihlen;
- while(fp) {
- if ((fp->len <= 0) || ((count + fp->len) > skb->len))
- goto out_invalid;
- memcpy((ptr + fp->offset), fp->ptr, fp->len);
- if (count == qp->ihlen) {
- skb->dst = dst_clone(fp->skb->dst);
- skb->dev = fp->skb->dev;
- }
- count += fp->len;
- fp = fp->next;
+ for (fp=head; fp; fp = fp->next) {
+ memcpy(skb_put(skb, fp->len), fp->data, fp->len);
+
+ if (skb->ip_summed != fp->ip_summed)
+ skb->ip_summed = CHECKSUM_NONE;
+ else if (skb->ip_summed == CHECKSUM_HW)
+ skb->csum = csum_chain(skb->csum, fp->csum);
}
- skb->pkt_type = qp->fragments->skb->pkt_type;
- skb->protocol = qp->fragments->skb->protocol;
+ skb->dst = dst_clone(head->dst);
+ skb->pkt_type = head->pkt_type;
+ skb->protocol = head->protocol;
+ skb->dev = qp->dev;
+
/*
* Clearly bogus, because security markings of the individual
* fragments should have been checked for consistency before
@@ -385,29 +554,24 @@
* as well take the value associated with the first fragment.
* --rct
*/
- skb->security = qp->fragments->skb->security;
+ skb->security = head->security;
#ifdef CONFIG_NETFILTER
/* Connection association is same as fragment (if any). */
- skb->nfct = qp->fragments->skb->nfct;
+ skb->nfct = head->nfct;
nf_conntrack_get(skb->nfct);
#ifdef CONFIG_NETFILTER_DEBUG
- skb->nf_debug = qp->fragments->skb->nf_debug;
+ skb->nf_debug = head->nf_debug;
#endif
#endif
/* Done with all fragments. Fixup the new IP header. */
iph = skb->nh.iph;
iph->frag_off = 0;
- iph->tot_len = htons(count);
+ iph->tot_len = htons(len);
IP_INC_STATS_BH(IpReasmOKs);
return skb;
-out_invalid:
- NETDEBUG(printk(KERN_ERR
- "Invalid fragment list: Fragment over size.\n"));
- kfree_skb(skb);
- goto out_fail;
out_nomem:
NETDEBUG(printk(KERN_ERR
"IP: queue_glue: no memory for gluing queue %p\n",
@@ -417,7 +581,7 @@
if (net_ratelimit())
printk(KERN_INFO
"Oversized IP packet from %d.%d.%d.%d.\n",
- NIPQUAD(qp->iph->saddr));
+ NIPQUAD(qp->saddr));
out_fail:
IP_INC_STATS_BH(IpReasmFails);
return NULL;
@@ -427,185 +591,32 @@
struct sk_buff *ip_defrag(struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
- struct ipfrag *prev, *next, *tmp, *tfp;
struct ipq *qp;
- unsigned char *ptr;
- int flags, offset;
- int i, ihl, end;
IP_INC_STATS_BH(IpReasmReqds);
- spin_lock(&ipfrag_lock);
-
/* Start by cleaning up the memory. */
if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
ip_evictor();
- /*
- * Look for the entry for this IP datagram in the
- * "incomplete datagrams" queue. If found, the
- * timer is removed.
- */
- qp = ip_find(iph, skb->dst);
-
- /* Is this a non-fragmented datagram? */
- offset = ntohs(iph->frag_off);
- flags = offset & ~IP_OFFSET;
- offset &= IP_OFFSET;
-
- offset <<= 3; /* offset is in 8-byte chunks */
- ihl = iph->ihl * 4;
-
- /*
- * Check whether to create a fresh queue entry. If the
- * queue already exists, its timer will be restarted as
- * long as we continue to receive fragments.
- */
- if (qp) {
- /* ANK. If the first fragment is received,
- * we should remember the correct IP header (with options)
- */
- if (offset == 0) {
- /* Fragmented frame replaced by unfragmented copy? */
- if ((flags & IP_MF) == 0)
- goto out_freequeue;
- qp->ihlen = ihl;
- memcpy(qp->iph, iph, (ihl + 8));
- }
- } else {
- /* Fragmented frame replaced by unfragmented copy? */
- if ((offset == 0) && ((flags & IP_MF) == 0))
- goto out_skb;
-
- /* If we failed to create it, then discard the frame. */
- qp = ip_create(skb, iph);
- if (!qp)
- goto out_freeskb;
- }
-
- /* Attempt to construct an oversize packet. */
- if((ntohs(iph->tot_len) + ((int) offset)) > 65535)
- goto out_oversize;
-
- /* Determine the position of this fragment. */
- end = offset + ntohs(iph->tot_len) - ihl;
-
- /* Is this the final fragment? */
- if ((flags & IP_MF) == 0)
- qp->len = end;
-
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = NULL;
- for(next = qp->fragments; next != NULL; next = next->next) {
- if (next->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
- /* Point into the IP datagram 'data' part. */
- ptr = skb->data + ihl;
+ /* Lookup (or create) queue header */
+ if ((qp = ip_find(iph)) != NULL) {
+ struct sk_buff *ret = NULL;
- /* We found where to put this one. Check for overlap with
- * preceding fragment, and, if needed, align things so that
- * any overlaps are eliminated.
- */
- if ((prev != NULL) && (offset < prev->end)) {
- i = prev->end - offset;
- offset += i; /* ptr into datagram */
- ptr += i; /* ptr into fragment data */
- }
+ spin_lock(&qp->lock);
- /* Look for overlap with succeeding segments.
- * If we can merge fragments, do it.
- */
- for (tmp = next; tmp != NULL; tmp = tfp) {
- tfp = tmp->next;
- if (tmp->offset >= end)
- break; /* no overlaps at all */
-
- i = end - next->offset; /* overlap is 'i' bytes */
- tmp->len -= i; /* so reduce size of */
- tmp->offset += i; /* next fragment */
- tmp->ptr += i;
+ ip_frag_queue(qp, skb);
- /* If we get a frag size of <= 0, remove it and the packet
- * that it goes with.
- */
- if (tmp->len <= 0) {
- if (tmp->prev != NULL)
- tmp->prev->next = tmp->next;
- else
- qp->fragments = tmp->next;
+ if (qp->last_in == (FIRST_IN|LAST_IN) &&
+ qp->meat == qp->len)
+ ret = ip_frag_reasm(qp);
- if (tmp->next != NULL)
- tmp->next->prev = tmp->prev;
-
- /* We have killed the original next frame. */
- next = tfp;
-
- frag_kfree_skb(tmp->skb);
- frag_kfree_s(tmp, sizeof(struct ipfrag));
- }
+ spin_unlock(&qp->lock);
+ ipq_put(qp);
+ return ret;
}
- /*
- * Create a fragment to hold this skb.
- * No memory to save the fragment? throw the lot ...
- */
- tfp = ip_frag_create(offset, end, skb, ptr);
- if (!tfp)
- goto out_freeskb;
-
- /* Insert this fragment in the chain of fragments. */
- tfp->prev = prev;
- tfp->next = next;
- if (prev != NULL)
- prev->next = tfp;
- else
- qp->fragments = tfp;
-
- if (next != NULL)
- next->prev = tfp;
-
- /* OK, so we inserted this new fragment into the chain.
- * Check if we now have a full IP datagram which we can
- * bump up to the IP layer...
- */
- if (ip_done(qp)) {
- /* Glue together the fragments. */
- skb = ip_glue(qp);
- /* Free the queue entry. */
-out_freequeue:
- ip_free(qp);
-out_skb:
- spin_unlock(&ipfrag_lock);
- return skb;
- }
-
- /*
- * The queue is still active ... reset its timer.
- */
-out_timer:
- mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time); /* ~ 30 seconds */
-out:
- spin_unlock(&ipfrag_lock);
- return NULL;
-
- /*
- * Error exits ... we need to reset the timer if there's a queue.
- */
-out_oversize:
- if (net_ratelimit())
- printk(KERN_INFO "Oversized packet received from %u.%u.%u.%u\n",
- NIPQUAD(iph->saddr));
- /* the skb isn't in a fragment, so fall through to free it */
-out_freeskb:
- kfree_skb(skb);
IP_INC_STATS_BH(IpReasmFails);
- if (qp)
- goto out_timer;
- goto out;
+ kfree_skb(skb);
+ return NULL;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)