patch-2.4.0-test10 linux/net/core/dev.c
Next file: linux/net/core/dev_mcast.c
Previous file: linux/net/core/Makefile
Back to the patch index
Back to the overall index
- Lines: 396
- Date:
Fri Oct 27 11:03:14 2000
- Orig file:
v2.4.0-test9/linux/net/core/dev.c
- Orig date:
Sun Oct 8 10:50:38 2000
diff -u --recursive --new-file v2.4.0-test9/linux/net/core/dev.c linux/net/core/dev.c
@@ -59,6 +59,8 @@
* Paul Rusty Russell : SIOCSIFNAME
* Pekka Riikonen : Netdev boot-time settings code
* Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt
+ * J Hadi Salim : - Backlog queue sampling
+ * - netif_rx() feedback
*/
#include <asm/uaccess.h>
@@ -85,6 +87,7 @@
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/if_bridge.h>
+#include <linux/divert.h>
#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/profile.h>
@@ -97,6 +100,18 @@
extern int plip_init(void);
#endif
+/* This define, if set, will randomly drop a packet when congestion
+ * is more than moderate. It helps fairness in the multi-interface
+ * case when one of them is a hog, but it kills performance for the
+ * single interface case so it is off now by default.
+ */
+#undef RAND_LIE
+
+/* Setting this will sample the queue lengths and thus congestion
+ * via a timer instead of as each packet is received.
+ */
+#undef OFFLINE_SAMPLE
+
NET_PROFILE_DEFINE(dev_queue_xmit)
NET_PROFILE_DEFINE(softnet_process)
@@ -133,6 +148,11 @@
static struct packet_type *ptype_base[16]; /* 16 way hashed list */
static struct packet_type *ptype_all = NULL; /* Taps */
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy);
+static struct timer_list samp_timer = { function: sample_queue };
+#endif
+
/*
* Our notifier list
*/
@@ -933,12 +953,20 @@
=======================================================================*/
int netdev_max_backlog = 300;
+/* These numbers are selected based on intuition and some
+ * experimentatiom, if you have more scientific way of doing this
+ * please go ahead and fix things.
+ */
+int no_cong_thresh = 10;
+int no_cong = 20;
+int lo_cong = 100;
+int mod_cong = 290;
struct netif_rx_stats netdev_rx_stat[NR_CPUS];
#ifdef CONFIG_NET_HW_FLOWCONTROL
-static atomic_t netdev_dropping = ATOMIC_INIT(0);
+atomic_t netdev_dropping = ATOMIC_INIT(0);
static unsigned long netdev_fc_mask = 1;
unsigned long netdev_fc_xoff = 0;
spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
@@ -996,6 +1024,56 @@
}
#endif
+static void get_sample_stats(int cpu)
+{
+#ifdef RAND_LIE
+ unsigned long rd;
+ int rq;
+#endif
+ int blog = softnet_data[cpu].input_pkt_queue.qlen;
+ int avg_blog = softnet_data[cpu].avg_blog;
+
+ avg_blog = (avg_blog >> 1)+ (blog >> 1);
+
+ if (avg_blog > mod_cong) {
+ /* Above moderate congestion levels. */
+ softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
+#ifdef RAND_LIE
+ rd = net_random();
+ rq = rd % netdev_max_backlog;
+ if (rq < avg_blog) /* unlucky bastard */
+ softnet_data[cpu].cng_level = NET_RX_DROP;
+#endif
+ } else if (avg_blog > lo_cong) {
+ softnet_data[cpu].cng_level = NET_RX_CN_MOD;
+#ifdef RAND_LIE
+ rd = net_random();
+ rq = rd % netdev_max_backlog;
+ if (rq < avg_blog) /* unlucky bastard */
+ softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
+#endif
+ } else if (avg_blog > no_cong)
+ softnet_data[cpu].cng_level = NET_RX_CN_LOW;
+ else /* no congestion */
+ softnet_data[cpu].cng_level = NET_RX_SUCCESS;
+
+ softnet_data[cpu].avg_blog = avg_blog;
+}
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy)
+{
+/* 10 ms 0r 1ms -- i dont care -- JHS */
+ int next_tick = 1;
+ int cpu = smp_processor_id();
+
+ get_sample_stats(cpu);
+ next_tick += jiffies;
+ mod_timer(&samp_timer, next_tick);
+}
+#endif
+
+
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
@@ -1004,9 +1082,18 @@
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
+ *
+ * return values:
+ * NET_RX_SUCCESS (no congestion)
+ * NET_RX_CN_LOW (low congestion)
+ * NET_RX_CN_MOD (moderate congestion)
+ * NET_RX_CN_HIGH (high congestion)
+ * NET_RX_DROP (packet was dropped)
+ *
+ *
*/
-void netif_rx(struct sk_buff *skb)
+int netif_rx(struct sk_buff *skb)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue;
@@ -1036,7 +1123,10 @@
__skb_queue_tail(&queue->input_pkt_queue,skb);
__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
local_irq_restore(flags);
- return;
+#ifndef OFFLINE_SAMPLE
+ get_sample_stats(this_cpu);
+#endif
+ return softnet_data[this_cpu].cng_level;
}
if (queue->throttle) {
@@ -1062,19 +1152,22 @@
local_irq_restore(flags);
kfree_skb(skb);
+ return NET_RX_DROP;
}
/* Deliver skb to an old protocol, which is not threaded well
or which do not understand shared skbs.
*/
-static void deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
+static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
{
static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
+ int ret = NET_RX_DROP;
+
if (!last) {
skb = skb_clone(skb, GFP_ATOMIC);
if (skb == NULL)
- return;
+ return ret;
}
/* The assumption (correct one) is that old protocols
@@ -1087,10 +1180,11 @@
/* Disable timers and wait for all timers completion */
tasklet_disable(bh_task_vec+TIMER_BH);
- pt->func(skb, skb->dev, pt);
+ ret = pt->func(skb, skb->dev, pt);
tasklet_enable(bh_task_vec+TIMER_BH);
spin_unlock(&net_bh_lock);
+ return ret;
}
/* Reparent skb to master device. This function is called
@@ -1173,20 +1267,33 @@
void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
#endif
-static void __inline__ handle_bridge(struct sk_buff *skb,
+static int __inline__ handle_bridge(struct sk_buff *skb,
struct packet_type *pt_prev)
{
+ int ret = NET_RX_DROP;
+
if (pt_prev) {
if (!pt_prev->data)
- deliver_to_old_ones(pt_prev, skb, 0);
+ ret = deliver_to_old_ones(pt_prev, skb, 0);
else {
atomic_inc(&skb->users);
- pt_prev->func(skb, skb->dev, pt_prev);
+ ret = pt_prev->func(skb, skb->dev, pt_prev);
}
}
br_handle_frame_hook(skb);
+ return ret;
+}
+
+
+#ifdef CONFIG_NET_DIVERT
+static inline void handle_diverter(struct sk_buff *skb)
+{
+ /* if diversion is supported on device, then divert */
+ if (skb->dev->divert && skb->dev->divert->divert)
+ divert_frame(skb);
}
+#endif /* CONFIG_NET_DIVERT */
static void net_rx_action(struct softirq_action *h)
@@ -1239,6 +1346,12 @@
}
}
+#ifdef CONFIG_NET_DIVERT
+ if (skb->dev->divert && skb->dev->divert->divert)
+ handle_diverter(skb);
+#endif /* CONFIG_NET_DIVERT */
+
+
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port != NULL &&
br_handle_frame_hook != NULL) {
@@ -1275,6 +1388,17 @@
if (bugdet-- < 0 || jiffies - start_time > 1)
goto softnet_break;
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
+ if (atomic_dec_and_test(&netdev_dropping)) {
+ queue->throttle = 0;
+ netdev_wakeup();
+ goto softnet_break;
+ }
+ }
+#endif
+
}
br_read_unlock(BR_NETPROTO_LOCK);
@@ -2113,9 +2237,9 @@
/**
* dev_new_index - allocate an ifindex
*
- * Returns a suitable unique value for a new device interface number.
- * The caller must hold the rtnl semaphore to be sure it remains
- * unique.
+ * Returns a suitable unique value for a new device interface
+ * number. The caller must hold the rtnl semaphore or the
+ * dev_base_lock to be sure it remains unique.
*/
int dev_new_index(void)
@@ -2140,6 +2264,10 @@
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
+ * Callers must hold the rtnl semaphore. See the comment at the
+ * end of Space.c for details about the locking. You may want
+ * register_netdev() instead of this.
+ *
* BUGS:
* The locking appears insufficient to guarantee two parallel registers
* will not get the same name.
@@ -2148,6 +2276,9 @@
int register_netdevice(struct net_device *dev)
{
struct net_device *d, **dp;
+#ifdef CONFIG_NET_DIVERT
+ int ret;
+#endif
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->xmit_lock);
@@ -2182,6 +2313,12 @@
dev_hold(dev);
write_unlock_bh(&dev_base_lock);
+#ifdef CONFIG_NET_DIVERT
+ ret = alloc_divert_blk(dev);
+ if (ret)
+ return ret;
+#endif /* CONFIG_NET_DIVERT */
+
/*
* Default initial state at registry is that the
* device is present.
@@ -2231,6 +2368,12 @@
dev->deadbeaf = 0;
write_unlock_bh(&dev_base_lock);
+#ifdef CONFIG_NET_DIVERT
+ ret = alloc_divert_blk(dev);
+ if (ret)
+ return ret;
+#endif /* CONFIG_NET_DIVERT */
+
/* Notify protocols, that a new device appeared. */
notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
@@ -2272,6 +2415,10 @@
* This function shuts down a device interface and removes it
* from the kernel tables. On success 0 is returned, on a failure
* a negative errno code is returned.
+ *
+ * Callers must hold the rtnl semaphore. See the comment at the
+ * end of Space.c for details about the locking. You may want
+ * unregister_netdev() instead of this.
*/
int unregister_netdevice(struct net_device *dev)
@@ -2325,6 +2472,10 @@
/* Notifier chain MUST detach us from master device. */
BUG_TRAP(dev->master==NULL);
+#ifdef CONFIG_NET_DIVERT
+ free_divert_blk(dev);
+#endif
+
if (dev->new_style) {
#ifdef NET_REFCNT_DEBUG
if (atomic_read(&dev->refcnt) != 1)
@@ -2397,7 +2548,15 @@
extern void net_device_init(void);
extern void ip_auto_config(void);
+#ifdef CONFIG_NET_DIVERT
+extern void dv_init(void);
+#endif /* CONFIG_NET_DIVERT */
+
+/*
+ * Callers must hold the rtnl semaphore. See the comment at the
+ * end of Space.c for details about the locking.
+ */
int __init net_dev_init(void)
{
struct net_device *dev, **dp;
@@ -2407,6 +2566,10 @@
pktsched_init();
#endif
+#ifdef CONFIG_NET_DIVERT
+ dv_init();
+#endif /* CONFIG_NET_DIVERT */
+
/*
* Initialise the packet receive queues.
*/
@@ -2417,6 +2580,8 @@
queue = &softnet_data[i];
skb_queue_head_init(&queue->input_pkt_queue);
queue->throttle = 0;
+ queue->cng_level = 0;
+ queue->avg_blog = 10; /* arbitrary non-zero */
queue->completion_queue = NULL;
}
@@ -2425,6 +2590,12 @@
NET_PROFILE_REGISTER(dev_queue_xmit);
NET_PROFILE_REGISTER(softnet_process);
#endif
+
+#ifdef OFFLINE_SAMPLE
+ samp_timer.expires = jiffies + (10 * HZ);
+ add_timer(&samp_timer);
+#endif
+
/*
* Add the devices.
* If the call to dev->init fails, the dev is removed
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)