summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sfc/tx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /drivers/net/ethernet/sfc/tx.c
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
downloadlinux-aecdc33e111b2c447b622e287c6003726daa1426.tar.gz
linux-aecdc33e111b2c447b622e287c6003726daa1426.tar.bz2
linux-aecdc33e111b2c447b622e287c6003726daa1426.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'drivers/net/ethernet/sfc/tx.c')
-rw-r--r--drivers/net/ethernet/sfc/tx.c627
1 files changed, 251 insertions, 376 deletions
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 18713436b443..5e090e54298e 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -22,14 +22,6 @@
#include "nic.h"
#include "workarounds.h"
-/*
- * TX descriptor ring full threshold
- *
- * The tx_queue descriptor ring fill-level must fall below this value
- * before we restart the netif queue
- */
-#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
-
static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer,
unsigned int *pkts_compl,
@@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
buffer->unmap_len);
- if (buffer->unmap_single)
+ if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
DMA_TO_DEVICE);
else
dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
DMA_TO_DEVICE);
buffer->unmap_len = 0;
- buffer->unmap_single = false;
}
- if (buffer->skb) {
+ if (buffer->flags & EFX_TX_BUF_SKB) {
(*pkts_compl)++;
(*bytes_compl) += buffer->skb->len;
dev_kfree_skb_any((struct sk_buff *) buffer->skb);
- buffer->skb = NULL;
netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
"TX queue %d transmission id %x complete\n",
tx_queue->queue, tx_queue->read_count);
+ } else if (buffer->flags & EFX_TX_BUF_HEAP) {
+ kfree(buffer->heap_buf);
}
-}
-/**
- * struct efx_tso_header - a DMA mapped buffer for packet headers
- * @next: Linked list of free ones.
- * The list is protected by the TX queue lock.
- * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
- * @dma_addr: The DMA address of the header below.
- *
- * This controls the memory used for a TSO header. Use TSOH_DATA()
- * to find the packet header data. Use TSOH_SIZE() to calculate the
- * total size required for a given packet header length. TSO headers
- * in the free list are exactly %TSOH_STD_SIZE bytes in size.
- */
-struct efx_tso_header {
- union {
- struct efx_tso_header *next;
- size_t unmap_len;
- };
- dma_addr_t dma_addr;
-};
+ buffer->len = 0;
+ buffer->flags = 0;
+}
static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
struct sk_buff *skb);
-static void efx_fini_tso(struct efx_tx_queue *tx_queue);
-static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
- struct efx_tso_header *tsoh);
-
-static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
- struct efx_tx_buffer *buffer)
-{
- if (buffer->tsoh) {
- if (likely(!buffer->tsoh->unmap_len)) {
- buffer->tsoh->next = tx_queue->tso_headers_free;
- tx_queue->tso_headers_free = buffer->tsoh;
- } else {
- efx_tsoh_heap_free(tx_queue, buffer->tsoh);
- }
- buffer->tsoh = NULL;
- }
-}
-
static inline unsigned
efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
@@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
return max_descs;
}
+/* Get partner of a TX queue, seen as part of the same net core queue */
+static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
+{
+ if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
+ return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
+ else
+ return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
+}
+
+static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
+{
+ /* We need to consider both queues that the net core sees as one */
+ struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
+ struct efx_nic *efx = txq1->efx;
+ unsigned int fill_level;
+
+ fill_level = max(txq1->insert_count - txq1->old_read_count,
+ txq2->insert_count - txq2->old_read_count);
+ if (likely(fill_level < efx->txq_stop_thresh))
+ return;
+
+ /* We used the stale old_read_count above, which gives us a
+ * pessimistic estimate of the fill level (which may even
+ * validly be >= efx->txq_entries). Now try again using
+ * read_count (more likely to be a cache miss).
+ *
+ * If we read read_count and then conditionally stop the
+ * queue, it is possible for the completion path to race with
+ * us and complete all outstanding descriptors in the middle,
+ * after which there will be no more completions to wake it.
+ * Therefore we stop the queue first, then read read_count
+ * (with a memory barrier to ensure the ordering), then
+ * restart the queue if the fill level turns out to be low
+ * enough.
+ */
+ netif_tx_stop_queue(txq1->core_txq);
+ smp_mb();
+ txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
+ txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+
+ fill_level = max(txq1->insert_count - txq1->old_read_count,
+ txq2->insert_count - txq2->old_read_count);
+ EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
+ if (likely(fill_level < efx->txq_stop_thresh)) {
+ smp_mb();
+ if (likely(!efx->loopback_selftest))
+ netif_tx_start_queue(txq1->core_txq);
+ }
+}
+
/*
* Add a socket buffer to a TX queue
*
@@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
* This function is split out from efx_hard_start_xmit to allow the
* loopback test to direct packets via specific TX queues.
*
- * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
+ * Returns NETDEV_TX_OK.
* You must hold netif_tx_lock() to call this function.
*/
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
@@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
struct device *dma_dev = &efx->pci_dev->dev;
struct efx_tx_buffer *buffer;
skb_frag_t *fragment;
- unsigned int len, unmap_len = 0, fill_level, insert_ptr;
+ unsigned int len, unmap_len = 0, insert_ptr;
dma_addr_t dma_addr, unmap_addr = 0;
unsigned int dma_len;
- bool unmap_single;
- int q_space, i = 0;
- netdev_tx_t rc = NETDEV_TX_OK;
+ unsigned short dma_flags;
+ int i = 0;
EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
@@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
return NETDEV_TX_OK;
}
- fill_level = tx_queue->insert_count - tx_queue->old_read_count;
- q_space = efx->txq_entries - 1 - fill_level;
-
/* Map for DMA. Use dma_map_single rather than dma_map_page
* since this is more efficient on machines with sparse
* memory.
*/
- unmap_single = true;
+ dma_flags = EFX_TX_BUF_MAP_SINGLE;
dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE);
/* Process all fragments */
@@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Add to TX queue, splitting across DMA boundaries */
do {
- if (unlikely(q_space-- <= 0)) {
- /* It might be that completions have
- * happened since the xmit path last
- * checked. Update the xmit path's
- * copy of read_count.
- */
- netif_tx_stop_queue(tx_queue->core_txq);
- /* This memory barrier protects the
- * change of queue state from the access
- * of read_count. */
- smp_mb();
- tx_queue->old_read_count =
- ACCESS_ONCE(tx_queue->read_count);
- fill_level = (tx_queue->insert_count
- - tx_queue->old_read_count);
- q_space = efx->txq_entries - 1 - fill_level;
- if (unlikely(q_space-- <= 0)) {
- rc = NETDEV_TX_BUSY;
- goto unwind;
- }
- smp_mb();
- if (likely(!efx->loopback_selftest))
- netif_tx_start_queue(
- tx_queue->core_txq);
- }
-
insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
buffer = &tx_queue->buffer[insert_ptr];
- efx_tsoh_free(tx_queue, buffer);
- EFX_BUG_ON_PARANOID(buffer->tsoh);
- EFX_BUG_ON_PARANOID(buffer->skb);
+ EFX_BUG_ON_PARANOID(buffer->flags);
EFX_BUG_ON_PARANOID(buffer->len);
- EFX_BUG_ON_PARANOID(!buffer->continuation);
EFX_BUG_ON_PARANOID(buffer->unmap_len);
dma_len = efx_max_tx_len(efx, dma_addr);
@@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Fill out per descriptor fields */
buffer->len = dma_len;
buffer->dma_addr = dma_addr;
+ buffer->flags = EFX_TX_BUF_CONT;
len -= dma_len;
dma_addr += dma_len;
++tx_queue->insert_count;
} while (len);
/* Transfer ownership of the unmapping to the final buffer */
- buffer->unmap_single = unmap_single;
+ buffer->flags = EFX_TX_BUF_CONT | dma_flags;
buffer->unmap_len = unmap_len;
unmap_len = 0;
@@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
len = skb_frag_size(fragment);
i++;
/* Map for DMA */
- unmap_single = false;
+ dma_flags = 0;
dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
DMA_TO_DEVICE);
}
/* Transfer ownership of the skb to the final buffer */
buffer->skb = skb;
- buffer->continuation = false;
+ buffer->flags = EFX_TX_BUF_SKB | dma_flags;
netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
/* Pass off to hardware */
efx_nic_push_buffers(tx_queue);
+ efx_tx_maybe_stop_queue(tx_queue);
+
return NETDEV_TX_OK;
dma_err:
@@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Mark the packet as transmitted, and free the SKB ourselves */
dev_kfree_skb_any(skb);
- unwind:
/* Work backwards until we hit the original insert pointer value */
while (tx_queue->insert_count != tx_queue->write_count) {
unsigned int pkts_compl = 0, bytes_compl = 0;
@@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
buffer = &tx_queue->buffer[insert_ptr];
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
- buffer->len = 0;
}
/* Free the fragment we were mid-way through pushing */
if (unmap_len) {
- if (unmap_single)
+ if (dma_flags & EFX_TX_BUF_MAP_SINGLE)
dma_unmap_single(dma_dev, unmap_addr, unmap_len,
DMA_TO_DEVICE);
else
@@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
DMA_TO_DEVICE);
}
- return rc;
+ return NETDEV_TX_OK;
}
/* Remove packets from the TX queue
@@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
}
efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
- buffer->continuation = true;
- buffer->len = 0;
++tx_queue->read_count;
read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -366,6 +339,12 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
+ /* PTP "event" packet */
+ if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
+ unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
+ return efx_ptp_tx(efx, skb);
+ }
+
index = skb_get_queue_mapping(skb);
type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
if (index >= efx->n_tx_channels) {
@@ -450,6 +429,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned fill_level;
struct efx_nic *efx = tx_queue->efx;
+ struct efx_tx_queue *txq2;
unsigned int pkts_compl = 0, bytes_compl = 0;
EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
@@ -457,15 +437,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
- /* See if we need to restart the netif queue. This barrier
- * separates the update of read_count from the test of the
- * queue state. */
+ /* See if we need to restart the netif queue. This memory
+ * barrier ensures that we write read_count (inside
+ * efx_dequeue_buffers()) before reading the queue status.
+ */
smp_mb();
if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
likely(efx->port_enabled) &&
likely(netif_device_present(efx->net_dev))) {
- fill_level = tx_queue->insert_count - tx_queue->read_count;
- if (fill_level < EFX_TXQ_THRESHOLD(efx))
+ txq2 = efx_tx_queue_partner(tx_queue);
+ fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+ txq2->insert_count - txq2->read_count);
+ if (fill_level <= efx->txq_wake_thresh)
netif_tx_wake_queue(tx_queue->core_txq);
}
@@ -480,11 +463,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
}
}
+/* Size of page-based TSO header buffers. Larger blocks must be
+ * allocated from the heap.
+ */
+#define TSOH_STD_SIZE 128
+#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE)
+
+/* At most half the descriptors in the queue at any time will refer to
+ * a TSO header buffer, since they must always be followed by a
+ * payload descriptor referring to an skb.
+ */
+static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
+{
+ return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
+}
+
int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
{
struct efx_nic *efx = tx_queue->efx;
unsigned int entries;
- int i, rc;
+ int rc;
/* Create the smallest power-of-two aligned ring */
entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
@@ -500,17 +498,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
GFP_KERNEL);
if (!tx_queue->buffer)
return -ENOMEM;
- for (i = 0; i <= tx_queue->ptr_mask; ++i)
- tx_queue->buffer[i].continuation = true;
+
+ if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
+ tx_queue->tsoh_page =
+ kcalloc(efx_tsoh_page_count(tx_queue),
+ sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
+ if (!tx_queue->tsoh_page) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+ }
/* Allocate hardware ring */
rc = efx_nic_probe_tx(tx_queue);
if (rc)
- goto fail;
+ goto fail2;
return 0;
- fail:
+fail2:
+ kfree(tx_queue->tsoh_page);
+ tx_queue->tsoh_page = NULL;
+fail1:
kfree(tx_queue->buffer);
tx_queue->buffer = NULL;
return rc;
@@ -546,8 +555,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
unsigned int pkts_compl = 0, bytes_compl = 0;
buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
- buffer->continuation = true;
- buffer->len = 0;
++tx_queue->read_count;
}
@@ -568,13 +575,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
efx_nic_fini_tx(tx_queue);
efx_release_tx_buffers(tx_queue);
-
- /* Free up TSO header cache */
- efx_fini_tso(tx_queue);
}
void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
{
+ int i;
+
if (!tx_queue->buffer)
return;
@@ -582,6 +588,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
"destroying TX queue %d\n", tx_queue->queue);
efx_nic_remove_tx(tx_queue);
+ if (tx_queue->tsoh_page) {
+ for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
+ efx_nic_free_buffer(tx_queue->efx,
+ &tx_queue->tsoh_page[i]);
+ kfree(tx_queue->tsoh_page);
+ tx_queue->tsoh_page = NULL;
+ }
+
kfree(tx_queue->buffer);
tx_queue->buffer = NULL;
}
@@ -604,22 +618,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
#define TSOH_OFFSET NET_IP_ALIGN
#endif
-#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
-
-/* Total size of struct efx_tso_header, buffer and padding */
-#define TSOH_SIZE(hdr_len) \
- (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
-
-/* Size of blocks on free list. Larger blocks must be allocated from
- * the heap.
- */
-#define TSOH_STD_SIZE 128
-
#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
-#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
-#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
-#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
-#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data)
/**
* struct tso_state - TSO state for an SKB
@@ -631,10 +630,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
* @in_len: Remaining length in current SKB fragment
* @unmap_len: Length of SKB fragment
* @unmap_addr: DMA address of SKB fragment
- * @unmap_single: DMA single vs page mapping flag
+ * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0
* @protocol: Network protocol (after any VLAN header)
+ * @ip_off: Offset of IP header
+ * @tcp_off: Offset of TCP header
* @header_len: Number of bytes of header
- * @full_packet_size: Number of bytes to put in each outgoing segment
+ * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
*
* The state used during segmentation. It is put into this data structure
* just to make it easy to pass into inline functions.
@@ -651,11 +652,13 @@ struct tso_state {
unsigned in_len;
unsigned unmap_len;
dma_addr_t unmap_addr;
- bool unmap_single;
+ unsigned short dma_flags;
__be16 protocol;
+ unsigned int ip_off;
+ unsigned int tcp_off;
unsigned header_len;
- int full_packet_size;
+ unsigned int ip_base_len;
};
@@ -687,91 +690,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb)
return protocol;
}
-
-/*
- * Allocate a page worth of efx_tso_header structures, and string them
- * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
- */
-static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
+static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer, unsigned int len)
{
- struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
- struct efx_tso_header *tsoh;
- dma_addr_t dma_addr;
- u8 *base_kva, *kva;
+ u8 *result;
- base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC);
- if (base_kva == NULL) {
- netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev,
- "Unable to allocate page for TSO headers\n");
- return -ENOMEM;
- }
-
- /* dma_alloc_coherent() allocates pages. */
- EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
-
- for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
- tsoh = (struct efx_tso_header *)kva;
- tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
- tsoh->next = tx_queue->tso_headers_free;
- tx_queue->tso_headers_free = tsoh;
- }
-
- return 0;
-}
-
-
-/* Free up a TSO header, and all others in the same page. */
-static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
- struct efx_tso_header *tsoh,
- struct device *dma_dev)
-{
- struct efx_tso_header **p;
- unsigned long base_kva;
- dma_addr_t base_dma;
-
- base_kva = (unsigned long)tsoh & PAGE_MASK;
- base_dma = tsoh->dma_addr & PAGE_MASK;
-
- p = &tx_queue->tso_headers_free;
- while (*p != NULL) {
- if (((unsigned long)*p & PAGE_MASK) == base_kva)
- *p = (*p)->next;
- else
- p = &(*p)->next;
- }
+ EFX_BUG_ON_PARANOID(buffer->len);
+ EFX_BUG_ON_PARANOID(buffer->flags);
+ EFX_BUG_ON_PARANOID(buffer->unmap_len);
- dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma);
-}
+ if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) {
+ unsigned index =
+ (tx_queue->insert_count & tx_queue->ptr_mask) / 2;
+ struct efx_buffer *page_buf =
+ &tx_queue->tsoh_page[index / TSOH_PER_PAGE];
+ unsigned offset =
+ TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET;
+
+ if (unlikely(!page_buf->addr) &&
+ efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE))
+ return NULL;
+
+ result = (u8 *)page_buf->addr + offset;
+ buffer->dma_addr = page_buf->dma_addr + offset;
+ buffer->flags = EFX_TX_BUF_CONT;
+ } else {
+ tx_queue->tso_long_headers++;
-static struct efx_tso_header *
-efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
-{
- struct efx_tso_header *tsoh;
-
- tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
- if (unlikely(!tsoh))
- return NULL;
-
- tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
- TSOH_BUFFER(tsoh), header_len,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
- tsoh->dma_addr))) {
- kfree(tsoh);
- return NULL;
+ buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC);
+ if (unlikely(!buffer->heap_buf))
+ return NULL;
+ result = (u8 *)buffer->heap_buf + TSOH_OFFSET;
+ buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
}
- tsoh->unmap_len = header_len;
- return tsoh;
-}
+ buffer->len = len;
-static void
-efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
-{
- dma_unmap_single(&tx_queue->efx->pci_dev->dev,
- tsoh->dma_addr, tsoh->unmap_len,
- DMA_TO_DEVICE);
- kfree(tsoh);
+ return result;
}
/**
@@ -781,47 +736,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
* @len: Length of fragment
* @final_buffer: The final buffer inserted into the queue
*
- * Push descriptors onto the TX queue. Return 0 on success or 1 if
- * @tx_queue full.
+ * Push descriptors onto the TX queue.
*/
-static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
- dma_addr_t dma_addr, unsigned len,
- struct efx_tx_buffer **final_buffer)
+static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, unsigned len,
+ struct efx_tx_buffer **final_buffer)
{
struct efx_tx_buffer *buffer;
struct efx_nic *efx = tx_queue->efx;
- unsigned dma_len, fill_level, insert_ptr;
- int q_space;
+ unsigned dma_len, insert_ptr;
EFX_BUG_ON_PARANOID(len <= 0);
- fill_level = tx_queue->insert_count - tx_queue->old_read_count;
- /* -1 as there is no way to represent all descriptors used */
- q_space = efx->txq_entries - 1 - fill_level;
-
while (1) {
- if (unlikely(q_space-- <= 0)) {
- /* It might be that completions have happened
- * since the xmit path last checked. Update
- * the xmit path's copy of read_count.
- */
- netif_tx_stop_queue(tx_queue->core_txq);
- /* This memory barrier protects the change of
- * queue state from the access of read_count. */
- smp_mb();
- tx_queue->old_read_count =
- ACCESS_ONCE(tx_queue->read_count);
- fill_level = (tx_queue->insert_count
- - tx_queue->old_read_count);
- q_space = efx->txq_entries - 1 - fill_level;
- if (unlikely(q_space-- <= 0)) {
- *final_buffer = NULL;
- return 1;
- }
- smp_mb();
- netif_tx_start_queue(tx_queue->core_txq);
- }
-
insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
buffer = &tx_queue->buffer[insert_ptr];
++tx_queue->insert_count;
@@ -830,12 +757,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
tx_queue->read_count >=
efx->txq_entries);
- efx_tsoh_free(tx_queue, buffer);
EFX_BUG_ON_PARANOID(buffer->len);
EFX_BUG_ON_PARANOID(buffer->unmap_len);
- EFX_BUG_ON_PARANOID(buffer->skb);
- EFX_BUG_ON_PARANOID(!buffer->continuation);
- EFX_BUG_ON_PARANOID(buffer->tsoh);
+ EFX_BUG_ON_PARANOID(buffer->flags);
buffer->dma_addr = dma_addr;
@@ -845,7 +769,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
if (dma_len >= len)
break;
- buffer->len = dma_len; /* Don't set the other members */
+ buffer->len = dma_len;
+ buffer->flags = EFX_TX_BUF_CONT;
dma_addr += dma_len;
len -= dma_len;
}
@@ -853,7 +778,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
EFX_BUG_ON_PARANOID(!len);
buffer->len = len;
*final_buffer = buffer;
- return 0;
}
@@ -864,54 +788,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
* a single fragment, and we know it doesn't cross a page boundary. It
* also allows us to not worry about end-of-packet etc.
*/
-static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
- struct efx_tso_header *tsoh, unsigned len)
+static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer, u8 *header)
{
- struct efx_tx_buffer *buffer;
-
- buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
- efx_tsoh_free(tx_queue, buffer);
- EFX_BUG_ON_PARANOID(buffer->len);
- EFX_BUG_ON_PARANOID(buffer->unmap_len);
- EFX_BUG_ON_PARANOID(buffer->skb);
- EFX_BUG_ON_PARANOID(!buffer->continuation);
- EFX_BUG_ON_PARANOID(buffer->tsoh);
- buffer->len = len;
- buffer->dma_addr = tsoh->dma_addr;
- buffer->tsoh = tsoh;
+ if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
+ buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
+ header, buffer->len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
+ buffer->dma_addr))) {
+ kfree(buffer->heap_buf);
+ buffer->len = 0;
+ buffer->flags = 0;
+ return -ENOMEM;
+ }
+ buffer->unmap_len = buffer->len;
+ buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
+ }
++tx_queue->insert_count;
+ return 0;
}
-/* Remove descriptors put into a tx_queue. */
+/* Remove buffers put into a tx_queue. None of the buffers must have
+ * an skb attached.
+ */
static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
{
struct efx_tx_buffer *buffer;
- dma_addr_t unmap_addr;
/* Work backwards until we hit the original insert pointer value */
while (tx_queue->insert_count != tx_queue->write_count) {
--tx_queue->insert_count;
buffer = &tx_queue->buffer[tx_queue->insert_count &
tx_queue->ptr_mask];
- efx_tsoh_free(tx_queue, buffer);
- EFX_BUG_ON_PARANOID(buffer->skb);
- if (buffer->unmap_len) {
- unmap_addr = (buffer->dma_addr + buffer->len -
- buffer->unmap_len);
- if (buffer->unmap_single)
- dma_unmap_single(&tx_queue->efx->pci_dev->dev,
- unmap_addr, buffer->unmap_len,
- DMA_TO_DEVICE);
- else
- dma_unmap_page(&tx_queue->efx->pci_dev->dev,
- unmap_addr, buffer->unmap_len,
- DMA_TO_DEVICE);
- buffer->unmap_len = 0;
- }
- buffer->len = 0;
- buffer->continuation = true;
+ efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
}
}
@@ -919,17 +831,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
/* Parse the SKB header and initialise state. */
static void tso_start(struct tso_state *st, const struct sk_buff *skb)
{
- /* All ethernet/IP/TCP headers combined size is TCP header size
- * plus offset of TCP header relative to start of packet.
- */
- st->header_len = ((tcp_hdr(skb)->doff << 2u)
- + PTR_DIFF(tcp_hdr(skb), skb->data));
- st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
-
- if (st->protocol == htons(ETH_P_IP))
+ st->ip_off = skb_network_header(skb) - skb->data;
+ st->tcp_off = skb_transport_header(skb) - skb->data;
+ st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
+ if (st->protocol == htons(ETH_P_IP)) {
+ st->ip_base_len = st->header_len - st->ip_off;
st->ipv4_id = ntohs(ip_hdr(skb)->id);
- else
+ } else {
+ st->ip_base_len = st->header_len - st->tcp_off;
st->ipv4_id = 0;
+ }
st->seqnum = ntohl(tcp_hdr(skb)->seq);
EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
@@ -938,7 +849,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb)
st->out_len = skb->len - st->header_len;
st->unmap_len = 0;
- st->unmap_single = false;
+ st->dma_flags = 0;
}
static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
@@ -947,7 +858,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
skb_frag_size(frag), DMA_TO_DEVICE);
if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
- st->unmap_single = false;
+ st->dma_flags = 0;
st->unmap_len = skb_frag_size(frag);
st->in_len = skb_frag_size(frag);
st->dma_addr = st->unmap_addr;
@@ -965,7 +876,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl,
len, DMA_TO_DEVICE);
if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
- st->unmap_single = true;
+ st->dma_flags = EFX_TX_BUF_MAP_SINGLE;
st->unmap_len = len;
st->in_len = len;
st->dma_addr = st->unmap_addr;
@@ -982,20 +893,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
* @st: TSO state
*
* Form descriptors for the current fragment, until we reach the end
- * of fragment or end-of-packet. Return 0 on success, 1 if not enough
- * space in @tx_queue.
+ * of fragment or end-of-packet.
*/
-static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
- const struct sk_buff *skb,
- struct tso_state *st)
+static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
+ const struct sk_buff *skb,
+ struct tso_state *st)
{
struct efx_tx_buffer *buffer;
- int n, end_of_packet, rc;
+ int n;
if (st->in_len == 0)
- return 0;
+ return;
if (st->packet_space == 0)
- return 0;
+ return;
EFX_BUG_ON_PARANOID(st->in_len <= 0);
EFX_BUG_ON_PARANOID(st->packet_space <= 0);
@@ -1006,25 +916,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
st->out_len -= n;
st->in_len -= n;
- rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
- if (likely(rc == 0)) {
- if (st->out_len == 0)
- /* Transfer ownership of the skb */
- buffer->skb = skb;
+ efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
- end_of_packet = st->out_len == 0 || st->packet_space == 0;
- buffer->continuation = !end_of_packet;
+ if (st->out_len == 0) {
+ /* Transfer ownership of the skb */
+ buffer->skb = skb;
+ buffer->flags = EFX_TX_BUF_SKB;
+ } else if (st->packet_space != 0) {
+ buffer->flags = EFX_TX_BUF_CONT;
+ }
- if (st->in_len == 0) {
- /* Transfer ownership of the DMA mapping */
- buffer->unmap_len = st->unmap_len;
- buffer->unmap_single = st->unmap_single;
- st->unmap_len = 0;
- }
+ if (st->in_len == 0) {
+ /* Transfer ownership of the DMA mapping */
+ buffer->unmap_len = st->unmap_len;
+ buffer->flags |= st->dma_flags;
+ st->unmap_len = 0;
}
st->dma_addr += n;
- return rc;
}
@@ -1035,36 +944,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
* @st: TSO state
*
* Generate a new header and prepare for the new packet. Return 0 on
- * success, or -1 if failed to alloc header.
+ * success, or -%ENOMEM if failed to alloc header.
*/
static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
const struct sk_buff *skb,
struct tso_state *st)
{
- struct efx_tso_header *tsoh;
+ struct efx_tx_buffer *buffer =
+ &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
struct tcphdr *tsoh_th;
unsigned ip_length;
u8 *header;
+ int rc;
- /* Allocate a DMA-mapped header buffer. */
- if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
- if (tx_queue->tso_headers_free == NULL) {
- if (efx_tsoh_block_alloc(tx_queue))
- return -1;
- }
- EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
- tsoh = tx_queue->tso_headers_free;
- tx_queue->tso_headers_free = tsoh->next;
- tsoh->unmap_len = 0;
- } else {
- tx_queue->tso_long_headers++;
- tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
- if (unlikely(!tsoh))
- return -1;
- }
+ /* Allocate and insert a DMA-mapped header buffer. */
+ header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
+ if (!header)
+ return -ENOMEM;
- header = TSOH_BUFFER(tsoh);
- tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
+ tsoh_th = (struct tcphdr *)(header + st->tcp_off);
/* Copy and update the headers. */
memcpy(header, skb->data, st->header_len);
@@ -1073,19 +971,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
st->seqnum += skb_shinfo(skb)->gso_size;
if (st->out_len > skb_shinfo(skb)->gso_size) {
/* This packet will not finish the TSO burst. */
- ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
+ st->packet_space = skb_shinfo(skb)->gso_size;
tsoh_th->fin = 0;
tsoh_th->psh = 0;
} else {
/* This packet will be the last in the TSO burst. */
- ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
+ st->packet_space = st->out_len;
tsoh_th->fin = tcp_hdr(skb)->fin;
tsoh_th->psh = tcp_hdr(skb)->psh;
}
+ ip_length = st->ip_base_len + st->packet_space;
if (st->protocol == htons(ETH_P_IP)) {
- struct iphdr *tsoh_iph =
- (struct iphdr *)(header + SKB_IPV4_OFF(skb));
+ struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off);
tsoh_iph->tot_len = htons(ip_length);
@@ -1094,16 +992,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
st->ipv4_id++;
} else {
struct ipv6hdr *tsoh_iph =
- (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb));
+ (struct ipv6hdr *)(header + st->ip_off);
- tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph));
+ tsoh_iph->payload_len = htons(ip_length);
}
- st->packet_space = skb_shinfo(skb)->gso_size;
- ++tx_queue->tso_packets;
+ rc = efx_tso_put_header(tx_queue, buffer, header);
+ if (unlikely(rc))
+ return rc;
- /* Form a descriptor for this header. */
- efx_tso_put_header(tx_queue, tsoh, st->header_len);
+ ++tx_queue->tso_packets;
return 0;
}
@@ -1118,13 +1016,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
*
* Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
* @skb was not enqueued. In all cases @skb is consumed. Return
- * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
+ * %NETDEV_TX_OK.
*/
static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
struct sk_buff *skb)
{
struct efx_nic *efx = tx_queue->efx;
- int frag_i, rc, rc2 = NETDEV_TX_OK;
+ int frag_i, rc;
struct tso_state state;
/* Find the packet protocol and sanity-check it */
@@ -1156,11 +1054,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
goto mem_err;
while (1) {
- rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
- if (unlikely(rc)) {
- rc2 = NETDEV_TX_BUSY;
- goto unwind;
- }
+ tso_fill_packet_with_fragment(tx_queue, skb, &state);
/* Move onto the next fragment? */
if (state.in_len == 0) {
@@ -1184,6 +1078,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
/* Pass off to hardware */
efx_nic_push_buffers(tx_queue);
+ efx_tx_maybe_stop_queue(tx_queue);
+
tx_queue->tso_bursts++;
return NETDEV_TX_OK;
@@ -1192,10 +1088,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
"Out of memory for TSO headers, or DMA mapping error\n");
dev_kfree_skb_any(skb);
- unwind:
/* Free the DMA mapping we were in the process of writing out */
if (state.unmap_len) {
- if (state.unmap_single)
+ if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr,
state.unmap_len, DMA_TO_DEVICE);
else
@@ -1204,25 +1099,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
}
efx_enqueue_unwind(tx_queue);
- return rc2;
-}
-
-
-/*
- * Free up all TSO datastructures associated with tx_queue. This
- * routine should be called only once the tx_queue is both empty and
- * will no longer be used.
- */
-static void efx_fini_tso(struct efx_tx_queue *tx_queue)
-{
- unsigned i;
-
- if (tx_queue->buffer) {
- for (i = 0; i <= tx_queue->ptr_mask; ++i)
- efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
- }
-
- while (tx_queue->tso_headers_free != NULL)
- efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
- &tx_queue->efx->pci_dev->dev);
+ return NETDEV_TX_OK;
}