Re: [PROBLEM] sundance on d-link dfe-580tx

Jeff Garzik (jgarzik@mandrakesoft.com)
Fri, 21 Jun 2002 01:27:50 -0400


This is a multi-part message in MIME format.
--------------060905050703030505010405
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

Matthew,

This patch just arrived from D-Link. It includes fixes specifically for
DFX-580TX. Does this fix your problem?

--------------060905050703030505010405
Content-Type: text/plain;
name="patch.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="patch.txt"

--- /tmp/sundance.c Fri Jun 21 00:48:54 2002
+++ sundance.c Fri Jun 21 17:57:27 2002
@@ -24,19 +24,23 @@
Version 1.02 (D-Link):
- Add new board to PCI ID list
- Fix multicast bug
+
+ Version 1.03 (D-Link):
+ - New Rx scheme, reduce Rx congestion
+ - Option to disable flow control

*/

#define DRV_NAME "sundance"
-#define DRV_VERSION "1.02"
-#define DRV_RELDATE "17-Jan-2002"
+#define DRV_VERSION "1.03"
+#define DRV_RELDATE "21-Jun-2002"


/* The user-configurable values.
These may be modified when a driver module is loaded.*/
static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */
/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
-static int max_interrupt_work = 30;
+static int max_interrupt_work = 0;
static int mtu;
/* Maximum number of multicast addresses to filter (vs. rx-all-multicast).
Typical is a 64 element hash table based on the Ethernet CRC. */
@@ -47,6 +51,8 @@
This chip can receive into offset buffers, so the Alpha does not
need a copy-align. */
static int rx_copybreak;
+static int tx_coalesce=1;
+static int flowctrl=1;

/* media[] specifies the media type the NIC operates at.
autosense Autosensing active media.
@@ -70,9 +76,10 @@
bonding and packet priority, and more than 128 requires modifying the
Tx error recovery.
Large receive rings merely waste memory. */
-#define TX_RING_SIZE 16
-#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
-#define RX_RING_SIZE 32
+#define TX_RING_SIZE 64
+#define TX_QUEUE_LEN (TX_RING_SIZE - 1) /* Limit ring entries actually used. */
+#define RX_RING_SIZE 64
+#define RX_BUDGET 32
#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct netdev_desc)
#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct netdev_desc)

@@ -129,10 +136,12 @@
MODULE_PARM(debug, "i");
MODULE_PARM(rx_copybreak, "i");
MODULE_PARM(media, "1-" __MODULE_STRING(MAX_UNITS) "s");
+MODULE_PARM(flowctrl, "i");
MODULE_PARM_DESC(max_interrupt_work, "Sundance Alta maximum events handled per interrupt");
MODULE_PARM_DESC(mtu, "Sundance Alta MTU (all boards)");
MODULE_PARM_DESC(debug, "Sundance Alta debug level (0-5)");
MODULE_PARM_DESC(rx_copybreak, "Sundance Alta copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(flowctrl, "Sundance Alta flow control [0|1]");
/*
Theory of Operation

@@ -207,7 +216,6 @@

*/

-

enum pci_id_flags_bits {
/* Set PCI command register bits before calling probe1(). */
@@ -399,19 +407,20 @@
struct timer_list timer; /* Media monitoring timer. */
/* Frequently used values: keep some adjacent for cache effect. */
spinlock_t lock;
+ spinlock_t rx_lock; /* Group with Tx control cache line. */
int chip_id, drv_flags;
unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */
unsigned int rx_buf_sz; /* Based on MTU+slack. */
- spinlock_t txlock; /* Group with Tx control cache line. */
struct netdev_desc *last_tx; /* Last Tx descriptor used. */
unsigned int cur_tx, dirty_tx;
- unsigned int tx_full:1; /* The Tx queue is full. */
/* These values are keep track of the transceiver/media in use. */
unsigned int full_duplex:1; /* Full-duplex operation requested. */
- unsigned int medialock:1; /* Do not sense media. */
+ unsigned int flowctrl:1;
unsigned int default_port:4; /* Last dev->if_port value. */
unsigned int an_enable:1;
unsigned int speed;
+ struct tasklet_struct rx_tasklet;
+ int budget;
/* Multicast and receive mode. */
spinlock_t mcastlock; /* SMP lock multicast updates. */
u16 mcast_filter[4];
@@ -424,6 +433,9 @@

/* The station address location in the EEPROM. */
#define EEPROM_SA_OFFSET 0x10
+#define DEFAULT_INTR (IntrRxDMADone | IntrPCIErr | \
+ IntrDrvRqst | IntrTxDone | StatsMax | \
+ LinkChange)

static int eeprom_read(long ioaddr, int location);
static int mdio_read(struct net_device *dev, int phy_id, int location);
@@ -435,8 +447,9 @@
static void init_ring(struct net_device *dev);
static int start_tx(struct sk_buff *skb, struct net_device *dev);
static void intr_handler(int irq, void *dev_instance, struct pt_regs *regs);
+static void rx_poll(unsigned long data);
+static void refill_rx (struct net_device *dev);
static void netdev_error(struct net_device *dev, int intr_status);
-static int netdev_rx(struct net_device *dev);
static void netdev_error(struct net_device *dev, int intr_status);
static void set_rx_mode(struct net_device *dev);
static struct net_device_stats *get_stats(struct net_device *dev);
@@ -502,6 +515,7 @@
np->drv_flags = pci_id_tbl[chip_idx].drv_flags;
np->pci_dev = pdev;
spin_lock_init(&np->lock);
+ tasklet_init(&np->rx_tasklet, rx_poll, (unsigned long)dev);

ring_space = pci_alloc_consistent(pdev, TX_TOTAL_SIZE, &ring_dma);
if (!ring_space)
@@ -582,6 +596,12 @@
np->an_enable = 1;
}
}
+ if (tx_coalesce < 1)
+ tx_coalesce = 1;
+ else if (tx_coalesce > TX_QUEUE_LEN - 1)
+ tx_coalesce = TX_QUEUE_LEN - 1;
+ if (flowctrl == 0)
+ np->flowctrl = 0;
}

/* Fibre PHY? */
@@ -742,7 +762,6 @@
return;
}

-
static int netdev_open(struct net_device *dev)
{
struct netdev_private *np = dev->priv;
@@ -784,8 +803,7 @@
netif_start_queue(dev);

/* Enable interrupts by setting the interrupt mask. */
- writew(IntrRxDone | IntrRxDMADone | IntrPCIErr | IntrDrvRqst | IntrTxDone
- | StatsMax | LinkChange, ioaddr + IntrEnable);
+ writew(DEFAULT_INTR, ioaddr + IntrEnable);

writew(StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1);

@@ -881,7 +899,7 @@
dev->trans_start = jiffies;
np->stats.tx_errors++;

- if (!np->tx_full)
+ if (!netif_queue_stopped(dev))
netif_wake_queue(dev);
}

@@ -892,7 +910,6 @@
struct netdev_private *np = dev->priv;
int i;

- np->tx_full = 0;
np->cur_rx = np->cur_tx = 0;
np->dirty_rx = np->dirty_tx = 0;

@@ -929,15 +946,16 @@
return;
}

-static int start_tx(struct sk_buff *skb, struct net_device *dev)
+static int
+start_tx (struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_private *np = dev->priv;
+ struct netdev_private *np = (struct netdev_private *) dev->priv;
struct netdev_desc *txdesc;
unsigned entry;
+ long ioaddr = dev->base_addr;

/* Note: Ordering is important here, set the field with the
"ownership" bit last, and only then increment cur_tx. */
-
/* Calculate the next Tx descriptor entry. */
entry = np->cur_tx % TX_RING_SIZE;
np->tx_skbuff[entry] = skb;
@@ -945,11 +963,17 @@

txdesc->next_desc = 0;
/* Note: disable the interrupt generation here before releasing. */
- txdesc->status =
- cpu_to_le32((entry<<2) | DescIntrOnDMADone | DescIntrOnTx | DisableAlign);
- txdesc->frag[0].addr = cpu_to_le32(pci_map_single(np->pci_dev,
- skb->data, skb->len, PCI_DMA_TODEVICE));
- txdesc->frag[0].length = cpu_to_le32(skb->len | LastFrag);
+ if (entry % tx_coalesce == 0) {
+ txdesc->status = cpu_to_le32 ((entry << 2) |
+ DescIntrOnTx | DisableAlign);
+
+ } else {
+ txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign);
+ }
+ txdesc->frag[0].addr = cpu_to_le32 (pci_map_single (np->pci_dev, skb->data,
+ skb->len,
+ PCI_DMA_TODEVICE));
+ txdesc->frag[0].length = cpu_to_le32 (skb->len | LastFrag);
if (np->last_tx)
np->last_tx->next_desc = cpu_to_le32(np->tx_ring_dma +
entry*sizeof(struct netdev_desc));
@@ -957,24 +981,26 @@
np->cur_tx++;

/* On some architectures: explicitly flush cache lines here. */
-
- if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 1) {
+ if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 1
+ && !netif_queue_stopped(dev)) {
/* do nothing */
} else {
- np->tx_full = 1;
- netif_stop_queue(dev);
+ netif_stop_queue (dev);
}
/* Side effect: The read wakes the potentially-idle transmit channel. */
- if (readl(dev->base_addr + TxListPtr) == 0)
- writel(np->tx_ring_dma + entry*sizeof(*np->tx_ring),
+ if (readl (dev->base_addr + TxListPtr) == 0)
+ writel (np->tx_ring_dma + entry*sizeof(*np->tx_ring),
dev->base_addr + TxListPtr);

dev->trans_start = jiffies;

if (debug > 4) {
- printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n",
- dev->name, np->cur_tx, entry);
+ printk (KERN_DEBUG
+ "%s: Transmit frame #%d queued in slot %d.\n",
+ dev->name, np->cur_tx, entry);
}
+ if (tx_coalesce > 1)
+ writel (1000, ioaddr + DownCounter);
return 0;
}

@@ -989,56 +1015,61 @@

ioaddr = dev->base_addr;
np = dev->priv;
- spin_lock(&np->lock);

do {
int intr_status = readw(ioaddr + IntrStatus);
- writew(intr_status & (IntrRxDone | IntrRxDMADone | IntrPCIErr |
- IntrDrvRqst | IntrTxDone | IntrTxDMADone | StatsMax |
- LinkChange), ioaddr + IntrStatus);
+ writew(intr_status, ioaddr + IntrStatus);

if (debug > 4)
printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n",
dev->name, intr_status);

- if (intr_status == 0)
+ if (!(intr_status & DEFAULT_INTR))
break;

- if (intr_status & (IntrRxDone|IntrRxDMADone))
- netdev_rx(dev);
+ if (intr_status & (IntrRxDMADone)) {
+ writew(DEFAULT_INTR & ~(IntrRxDone|IntrRxDMADone),
+ ioaddr + IntrEnable);
+ if (np->budget < 0)
+ np->budget = RX_BUDGET;
+ tasklet_schedule(&np->rx_tasklet);
+ }

- if (intr_status & IntrTxDone) {
+ if (intr_status & (IntrTxDone | IntrDrvRqst)) {
int boguscnt = 32;
- int tx_status = readw(ioaddr + TxStatus);
+ int tx_status = readw (ioaddr + TxStatus);
while (tx_status & 0x80) {
if (debug > 4)
- printk("%s: Transmit status is %2.2x.\n",
- dev->name, tx_status);
+ printk
+ ("%s: Transmit status is %2.2x.\n",
+ dev->name, tx_status);
if (tx_status & 0x1e) {
np->stats.tx_errors++;
- if (tx_status & 0x10) np->stats.tx_fifo_errors++;
+ if (tx_status & 0x10)
+ np->stats.tx_fifo_errors++;
#ifdef ETHER_STATS
- if (tx_status & 0x08) np->stats.collisions16++;
+ if (tx_status & 0x08)
+ np->stats.collisions16++;
#else
- if (tx_status & 0x08) np->stats.collisions++;
+ if (tx_status & 0x08)
+ np->stats.collisions++;
#endif
- if (tx_status & 0x04) np->stats.tx_fifo_errors++;
- if (tx_status & 0x02) np->stats.tx_window_errors++;
+ if (tx_status & 0x04)
+ np->stats.tx_fifo_errors++;
+ if (tx_status & 0x02)
+ np->stats.tx_window_errors++;
/* This reset has not been verified!. */
- if (tx_status & 0x10) { /* Reset the Tx. */
- writew(0x001c, ioaddr + ASICCtrl + 2);
-#if 0 /* Do we need to reset the Tx pointer here? */
- writel(np->tx_ring_dma
- + np->dirty_tx*sizeof(*np->tx_ring),
- dev->base_addr + TxListPtr);
-#endif
+ if (tx_status & 0x10) { /* Reset the Tx. */
+ writew (0x001c,
+ ioaddr + ASICCtrl + 2);
}
- if (tx_status & 0x1e) /* Restart the Tx. */
- writew(TxEnable, ioaddr + MACCtrl1);
+ if (tx_status & 0x1e) /* Restart the Tx. */
+ writew (TxEnable,
+ ioaddr + MACCtrl1);
}
/* Yup, this is a documentation bug. It cost me *hours*. */
- writew(0, ioaddr + TxStatus);
- tx_status = readb(ioaddr + TxStatus);
+ writew (0, ioaddr + TxStatus);
+ tx_status = readw (ioaddr + TxStatus);
if (--boguscnt < 0)
break;
}
@@ -1046,26 +1077,24 @@
for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) {
int entry = np->dirty_tx % TX_RING_SIZE;
struct sk_buff *skb;
-
- if ( ! (np->tx_ring[entry].status & 0x00010000))
+ if (!(np->tx_ring[entry].status & 0x00010000))
break;
skb = np->tx_skbuff[entry];
/* Free the original skb. */
pci_unmap_single(np->pci_dev,
np->tx_ring[entry].frag[0].addr,
skb->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(skb);
+ dev_kfree_skb_irq (np->tx_skbuff[entry]);
np->tx_skbuff[entry] = 0;
}
- if (np->tx_full
- && np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) {
+ if (netif_queue_stopped(dev) &&
+ np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) {
/* The ring is no longer full, clear tbusy. */
- np->tx_full = 0;
- netif_wake_queue(dev);
+ netif_wake_queue (dev);
}

/* Abnormal error summary/uncommon events handlers. */
- if (intr_status & (IntrDrvRqst | IntrPCIErr | LinkChange | StatsMax))
+ if (intr_status & (IntrPCIErr | LinkChange | StatsMax))
netdev_error(dev, intr_status);
if (--boguscnt < 0) {
get_stats(dev);
@@ -1073,49 +1102,41 @@
printk(KERN_WARNING "%s: Too much work at interrupt, "
"status=0x%4.4x / 0x%4.4x.\n",
dev->name, intr_status, readw(ioaddr + IntrClear));
- /* Re-enable us in 3.2msec. */
- writew(0, ioaddr + IntrEnable);
- writew(1000, ioaddr + DownCounter);
- writew(IntrDrvRqst, ioaddr + IntrEnable);
break;
}
} while (1);
-
if (debug > 3)
printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
dev->name, readw(ioaddr + IntrStatus));
+ if (np->cur_tx - np->dirty_tx > 0 && tx_coalesce > 1)
+ writel(100, ioaddr + DownCounter);

- spin_unlock(&np->lock);
}

-/* This routine is logically part of the interrupt handler, but separated
- for clarity and better register allocation. */
-static int netdev_rx(struct net_device *dev)
+static void rx_poll(unsigned long data)
{
+ struct net_device *dev = (struct net_device *)data;
struct netdev_private *np = dev->priv;
int entry = np->cur_rx % RX_RING_SIZE;
- int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx;
-
- if (debug > 4) {
- printk(KERN_DEBUG " In netdev_rx(), entry %d status %4.4x.\n",
- entry, np->rx_ring[entry].status);
- }
+ int boguscnt = np->budget;
+ long ioaddr = dev->base_addr;
+ int received = 0;

/* If EOP is set on the next entry, it's a new packet. Send it up. */
while (1) {
struct netdev_desc *desc = &(np->rx_ring[entry]);
- u32 frame_status;
+ u32 frame_status = le32_to_cpu(desc->status);
int pkt_len;

+ if (--boguscnt < 0) {
+ goto not_done;
+ }
if (!(desc->status & DescOwn))
break;
- frame_status = le32_to_cpu(desc->status);
pkt_len = frame_status & 0x1fff; /* Chip omits the CRC. */
if (debug > 4)
printk(KERN_DEBUG " netdev_rx() status was %8.8x.\n",
frame_status);
- if (--boguscnt < 0)
- break;
pci_dma_sync_single(np->pci_dev, desc->frag[0].addr,
np->rx_buf_sz, PCI_DMA_FROMDEVICE);

@@ -1136,7 +1157,6 @@
}
} else {
struct sk_buff *skb;
-
#ifndef final_version
if (debug > 4)
printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d"
@@ -1164,11 +1184,36 @@
netif_rx(skb);
dev->last_rx = jiffies;
}
- entry = (++np->cur_rx) % RX_RING_SIZE;
+ entry = (entry + 1) % RX_RING_SIZE;
+ received++;
}
+ np->cur_rx = entry;
+ refill_rx (dev);
+ np->budget -= received;
+ writew(DEFAULT_INTR, ioaddr + IntrEnable);
+ return;
+
+not_done:
+ np->cur_rx = entry;
+ refill_rx (dev);
+ if (!received)
+ received = 1;
+ np->budget -= received;
+ if (np->budget <= 0)
+ np->budget = RX_BUDGET;
+ tasklet_schedule(&np->rx_tasklet);
+ return;
+}
+
+static void refill_rx (struct net_device *dev)
+{
+ struct netdev_private *np = dev->priv;
+ int entry;
+ int cnt = 0;

/* Refill the Rx ring buffers. */
- for (; np->cur_rx - np->dirty_rx > 0; np->dirty_rx++) {
+ for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0;
+ np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) {
struct sk_buff *skb;
entry = np->dirty_rx % RX_RING_SIZE;
if (np->rx_skbuff[entry] == NULL) {
@@ -1186,30 +1231,17 @@
np->rx_ring[entry].frag[0].length =
cpu_to_le32(np->rx_buf_sz | LastFrag);
np->rx_ring[entry].status = 0;
+ cnt++;
}
-
- /* No need to restart Rx engine, it will poll. */
- return 0;
+ return;
}
-
static void netdev_error(struct net_device *dev, int intr_status)
{
long ioaddr = dev->base_addr;
struct netdev_private *np = dev->priv;
u16 mii_ctl, mii_advertise, mii_lpa;
int speed;
-
- if (intr_status & IntrDrvRqst) {
- /* Stop the down counter and turn interrupts back on. */
- if (debug > 1)
- printk("%s: Turning interrupts back on.\n", dev->name);
- writew(0, ioaddr + IntrEnable);
- writew(0, ioaddr + DownCounter);
- writew(IntrRxDone | IntrRxDMADone | IntrPCIErr | IntrDrvRqst |
- IntrTxDone | StatsMax | LinkChange, ioaddr + IntrEnable);
- /* Ack buggy InRequest */
- writew (IntrDrvRqst, ioaddr + IntrStatus);
- }
+
if (intr_status & LinkChange) {
if (np->an_enable) {
mii_advertise = mdio_read (dev, np->phys[0], MII_ADVERTISE);
@@ -1236,6 +1268,9 @@
"full" : "half");
}
check_duplex (dev);
+ if (np->flowctrl == 0)
+ writew(readw(ioaddr + MACCtrl0) & ~EnbFlowCtrl,
+ ioaddr + MACCtrl0);
}
if (intr_status & StatsMax) {
get_stats(dev);
@@ -1482,3 +1517,5 @@

module_init(sundance_init);
module_exit(sundance_exit);
+
+

--------------060905050703030505010405--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/