summaryrefslogtreecommitdiffstats
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h338
1 files changed, 292 insertions, 46 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c0a4f3ab0cc0..fe864885c1ed 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -20,7 +20,7 @@
#include <linux/time.h>
#include <linux/cache.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/types.h>
#include <linux/spinlock.h>
#include <linux/net.h>
@@ -29,6 +29,7 @@
#include <linux/rcupdate.h>
#include <linux/dmaengine.h>
#include <linux/hrtimer.h>
+#include <linux/dma-mapping.h>
/* Don't change this without changing skb_csum_unnecessary! */
#define CHECKSUM_NONE 0
@@ -45,6 +46,11 @@
#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
+/* return minimum truesize of one skb containing X bytes of data */
+#define SKB_TRUESIZE(X) ((X) + \
+ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
/* A. Checksumming of received packets by device.
*
* NONE: device failed to checksum this packet.
@@ -134,7 +140,9 @@ struct sk_buff;
typedef struct skb_frag_struct skb_frag_t;
struct skb_frag_struct {
- struct page *page;
+ struct {
+ struct page *p;
+ } page;
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
__u32 page_offset;
__u32 size;
@@ -144,6 +152,26 @@ struct skb_frag_struct {
#endif
};
+static inline unsigned int skb_frag_size(const skb_frag_t *frag)
+{
+ return frag->size;
+}
+
+static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
+{
+ frag->size = size;
+}
+
+static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
+{
+ frag->size += delta;
+}
+
+static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
+{
+ frag->size -= delta;
+}
+
#define HAVE_HW_TIME_STAMP
/**
@@ -187,6 +215,20 @@ enum {
/* ensure the originating sk reference is available on driver level */
SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
+
+ /* device driver supports TX zero-copy buffers */
+ SKBTX_DEV_ZEROCOPY = 1 << 4,
+};
+
+/*
+ * The callback notifies userspace to release buffers when skb DMA is done in
+ * lower device, the skb last reference should be 0 when calling this.
+ * The desc is used to track userspace buffer index.
+ */
+struct ubuf_info {
+ void (*callback)(void *);
+ void *arg;
+ unsigned long desc;
};
/* This data is invariant across clones and lives at
@@ -211,6 +253,7 @@ struct skb_shared_info {
/* Intermediate layers must ensure that destructor_arg
* remains valid until skb destructor */
void * destructor_arg;
+
/* must be last field, see pskb_expand_head() */
skb_frag_t frags[MAX_SKB_FRAGS];
};
@@ -270,15 +313,12 @@ typedef unsigned char *sk_buff_data_t;
* struct sk_buff - socket buffer
* @next: Next buffer in list
* @prev: Previous buffer in list
- * @sk: Socket we are owned by
* @tstamp: Time we arrived
+ * @sk: Socket we are owned by
* @dev: Device we arrived on/are leaving by
- * @transport_header: Transport layer header
- * @network_header: Network layer header
- * @mac_header: Link layer header
+ * @cb: Control buffer. Free for use by every layer. Put private vars here
* @_skb_refdst: destination entry (with norefcount bit)
* @sp: the security path, used for xfrm
- * @cb: Control buffer. Free for use by every layer. Put private vars here
* @len: Length of actual data
* @data_len: Data length
* @mac_len: Length of link layer header
@@ -286,40 +326,47 @@ typedef unsigned char *sk_buff_data_t;
* @csum: Checksum (must include start/offset pair)
* @csum_start: Offset from skb->head where checksumming should start
* @csum_offset: Offset from csum_start where checksum should be stored
+ * @priority: Packet queueing priority
* @local_df: allow local fragmentation
* @cloned: Head may be cloned (check refcnt to be sure)
+ * @ip_summed: Driver fed us an IP checksum
* @nohdr: Payload reference only, must not modify header
+ * @nfctinfo: Relationship of this skb to the connection
* @pkt_type: Packet class
* @fclone: skbuff clone status
- * @ip_summed: Driver fed us an IP checksum
- * @priority: Packet queueing priority
- * @users: User count - see {datagram,tcp}.c
- * @protocol: Packet protocol from driver
- * @truesize: Buffer size
- * @head: Head of buffer
- * @data: Data head pointer
- * @tail: Tail pointer
- * @end: End pointer
- * @destructor: Destruct function
- * @mark: Generic packet mark
- * @nfct: Associated connection, if any
* @ipvs_property: skbuff is owned by ipvs
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
- * @nfctinfo: Relationship of this skb to the connection
+ * @protocol: Packet protocol from driver
+ * @destructor: Destruct function
+ * @nfct: Associated connection, if any
* @nfct_reasm: netfilter conntrack re-assembly pointer
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
- * @rxhash: the packet hash computed on receive
- * @queue_mapping: Queue mapping for multiqueue devices
* @tc_index: Traffic control index
* @tc_verd: traffic control verdict
+ * @rxhash: the packet hash computed on receive
+ * @queue_mapping: Queue mapping for multiqueue devices
* @ndisc_nodetype: router type (from link layer)
+ * @ooo_okay: allow the mapping of a socket to a queue to be changed
+ * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
+ * ports.
* @dma_cookie: a cookie to one of several possible DMA operations
* done by skb DMA functions
* @secmark: security marking
+ * @mark: Generic packet mark
+ * @dropcount: total number of sk_receive_queue overflows
* @vlan_tci: vlan tag control information
+ * @transport_header: Transport layer header
+ * @network_header: Network layer header
+ * @mac_header: Link layer header
+ * @tail: Tail pointer
+ * @end: End pointer
+ * @head: Head of buffer
+ * @data: Data head pointer
+ * @truesize: Buffer size
+ * @users: User count - see {datagram,tcp}.c
*/
struct sk_buff {
@@ -397,6 +444,7 @@ struct sk_buff {
__u8 ndisc_nodetype:2;
#endif
__u8 ooo_okay:1;
+ __u8 l4_rxhash:1;
kmemcheck_bitfield_end(flags2);
/* 0/13 bit hole */
@@ -504,9 +552,11 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
return __alloc_skb(size, priority, 1, NUMA_NO_NODE);
}
+extern void skb_recycle(struct sk_buff *skb);
extern bool skb_recycle_check(struct sk_buff *skb, int skb_size);
extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
extern struct sk_buff *skb_clone(struct sk_buff *skb,
gfp_t priority);
extern struct sk_buff *skb_copy(const struct sk_buff *skb,
@@ -555,11 +605,11 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
unsigned int to, struct ts_config *config,
struct ts_state *state);
-extern __u32 __skb_get_rxhash(struct sk_buff *skb);
+extern void __skb_get_rxhash(struct sk_buff *skb);
static inline __u32 skb_get_rxhash(struct sk_buff *skb)
{
if (!skb->rxhash)
- skb->rxhash = __skb_get_rxhash(skb);
+ __skb_get_rxhash(skb);
return skb->rxhash;
}
@@ -805,9 +855,9 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
* The reference count is not incremented and the reference is therefore
* volatile. Use with caution.
*/
-static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
{
- struct sk_buff *list = ((struct sk_buff *)list_)->next;
+ struct sk_buff *list = ((const struct sk_buff *)list_)->next;
if (list == (struct sk_buff *)list_)
list = NULL;
return list;
@@ -826,9 +876,9 @@ static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
* The reference count is not incremented and the reference is therefore
* volatile. Use with caution.
*/
-static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_)
{
- struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+ struct sk_buff *list = ((const struct sk_buff *)list_)->prev;
if (list == (struct sk_buff *)list_)
list = NULL;
return list;
@@ -1105,18 +1155,51 @@ static inline int skb_pagelen(const struct sk_buff *skb)
int i, len = 0;
for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
- len += skb_shinfo(skb)->frags[i].size;
+ len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
return len + skb_headlen(skb);
}
-static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
- struct page *page, int off, int size)
+/**
+ * __skb_fill_page_desc - initialise a paged fragment in an skb
+ * @skb: buffer containing fragment to be initialised
+ * @i: paged fragment index to initialise
+ * @page: the page to use for this fragment
+ * @off: the offset to the data with @page
+ * @size: the length of the data
+ *
+ * Initialises the @i'th fragment of @skb to point to &size bytes at
+ * offset @off within @page.
+ *
+ * Does not take any additional reference on the fragment.
+ */
+static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off, int size)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- frag->page = page;
+ frag->page.p = page;
frag->page_offset = off;
- frag->size = size;
+ skb_frag_size_set(frag, size);
+}
+
+/**
+ * skb_fill_page_desc - initialise a paged fragment in an skb
+ * @skb: buffer containing fragment to be initialised
+ * @i: paged fragment index to initialise
+ * @page: the page to use for this fragment
+ * @off: the offset to the data with @page
+ * @size: the length of the data
+ *
+ * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
+ * @skb to point to &size bytes at offset @off within @page. In
+ * addition updates @skb such that @i is the last fragment.
+ *
+ * Does not take any additional reference on the fragment.
+ */
+static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off, int size)
+{
+ __skb_fill_page_desc(skb, i, page, off, size);
skb_shinfo(skb)->nr_frags = i + 1;
}
@@ -1562,16 +1645,22 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
}
-static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
- unsigned int length)
+static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev,
+ unsigned int length, gfp_t gfp)
{
- struct sk_buff *skb = netdev_alloc_skb(dev, length + NET_IP_ALIGN);
+ struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp);
if (NET_IP_ALIGN && skb)
skb_reserve(skb, NET_IP_ALIGN);
return skb;
}
+static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
+ unsigned int length)
+{
+ return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
+}
+
/**
* __netdev_alloc_page - allocate a page for ps-rx on a specific device
* @dev: network device to receive on
@@ -1605,6 +1694,137 @@ static inline void netdev_free_page(struct net_device *dev, struct page *page)
}
/**
+ * skb_frag_page - retrieve the page refered to by a paged fragment
+ * @frag: the paged fragment
+ *
+ * Returns the &struct page associated with @frag.
+ */
+static inline struct page *skb_frag_page(const skb_frag_t *frag)
+{
+ return frag->page.p;
+}
+
+/**
+ * __skb_frag_ref - take an addition reference on a paged fragment.
+ * @frag: the paged fragment
+ *
+ * Takes an additional reference on the paged fragment @frag.
+ */
+static inline void __skb_frag_ref(skb_frag_t *frag)
+{
+ get_page(skb_frag_page(frag));
+}
+
+/**
+ * skb_frag_ref - take an addition reference on a paged fragment of an skb.
+ * @skb: the buffer
+ * @f: the fragment offset.
+ *
+ * Takes an additional reference on the @f'th paged fragment of @skb.
+ */
+static inline void skb_frag_ref(struct sk_buff *skb, int f)
+{
+ __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
+}
+
+/**
+ * __skb_frag_unref - release a reference on a paged fragment.
+ * @frag: the paged fragment
+ *
+ * Releases a reference on the paged fragment @frag.
+ */
+static inline void __skb_frag_unref(skb_frag_t *frag)
+{
+ put_page(skb_frag_page(frag));
+}
+
+/**
+ * skb_frag_unref - release a reference on a paged fragment of an skb.
+ * @skb: the buffer
+ * @f: the fragment offset
+ *
+ * Releases a reference on the @f'th paged fragment of @skb.
+ */
+static inline void skb_frag_unref(struct sk_buff *skb, int f)
+{
+ __skb_frag_unref(&skb_shinfo(skb)->frags[f]);
+}
+
+/**
+ * skb_frag_address - gets the address of the data contained in a paged fragment
+ * @frag: the paged fragment buffer
+ *
+ * Returns the address of the data within @frag. The page must already
+ * be mapped.
+ */
+static inline void *skb_frag_address(const skb_frag_t *frag)
+{
+ return page_address(skb_frag_page(frag)) + frag->page_offset;
+}
+
+/**
+ * skb_frag_address_safe - gets the address of the data contained in a paged fragment
+ * @frag: the paged fragment buffer
+ *
+ * Returns the address of the data within @frag. Checks that the page
+ * is mapped and returns %NULL otherwise.
+ */
+static inline void *skb_frag_address_safe(const skb_frag_t *frag)
+{
+ void *ptr = page_address(skb_frag_page(frag));
+ if (unlikely(!ptr))
+ return NULL;
+
+ return ptr + frag->page_offset;
+}
+
+/**
+ * __skb_frag_set_page - sets the page contained in a paged fragment
+ * @frag: the paged fragment
+ * @page: the page to set
+ *
+ * Sets the fragment @frag to contain @page.
+ */
+static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
+{
+ frag->page.p = page;
+}
+
+/**
+ * skb_frag_set_page - sets the page contained in a paged fragment of an skb
+ * @skb: the buffer
+ * @f: the fragment offset
+ * @page: the page to set
+ *
+ * Sets the @f'th fragment of @skb to contain @page.
+ */
+static inline void skb_frag_set_page(struct sk_buff *skb, int f,
+ struct page *page)
+{
+ __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
+}
+
+/**
+ * skb_frag_dma_map - maps a paged fragment via the DMA API
+ * @dev: the device to map the fragment to
+ * @frag: the paged fragment to map
+ * @offset: the offset within the fragment (starting at the
+ * fragment's own offset)
+ * @size: the number of bytes to map
+ * @dir: the direction of the mapping (%PCI_DMA_*)
+ *
+ * Maps the page associated with @frag to @device.
+ */
+static inline dma_addr_t skb_frag_dma_map(struct device *dev,
+ const skb_frag_t *frag,
+ size_t offset, size_t size,
+ enum dma_data_direction dir)
+{
+ return dma_map_page(dev, skb_frag_page(frag),
+ frag->page_offset + offset, size, dir);
+}
+
+/**
* skb_clone_writable - is the header of a clone writable
* @skb: buffer to check
* @len: length up to which to write
@@ -1612,7 +1832,7 @@ static inline void netdev_free_page(struct net_device *dev, struct page *page)
* Returns true if modifying the header part of the cloned buffer
* does not requires the data to be copied.
*/
-static inline int skb_clone_writable(struct sk_buff *skb, unsigned int len)
+static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len)
{
return !skb_header_cloned(skb) &&
skb_headroom(skb) + len <= skb->hdr_len;
@@ -1706,13 +1926,13 @@ static inline int skb_add_data(struct sk_buff *skb,
}
static inline int skb_can_coalesce(struct sk_buff *skb, int i,
- struct page *page, int off)
+ const struct page *page, int off)
{
if (i) {
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+ const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
- return page == frag->page &&
- off == frag->page_offset + frag->size;
+ return page == skb_frag_page(frag) &&
+ off == frag->page_offset + skb_frag_size(frag);
}
return 0;
}
@@ -1996,8 +2216,13 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
/**
* skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
*
+ * PHY drivers may accept clones of transmitted packets for
+ * timestamping via their phy_driver.txtstamp method. These drivers
+ * must call this function to return the skb back to the stack, with
+ * or without a timestamp.
+ *
* @skb: clone of the the original outgoing packet
- * @hwtstamps: hardware time stamps
+ * @hwtstamps: hardware time stamps, may be NULL if not available
*
*/
void skb_complete_tx_timestamp(struct sk_buff *skb,
@@ -2028,8 +2253,7 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
* skb_tx_timestamp() - Driver hook for transmit timestamping
*
* Ethernet MAC Drivers should call this function in their hard_xmit()
- * function as soon as possible after giving the sk_buff to the MAC
- * hardware, but before freeing the sk_buff.
+ * function immediately before giving the sk_buff to the MAC hardware.
*
* @skb: A socket buffer.
*/
@@ -2234,7 +2458,8 @@ static inline bool skb_warn_if_lro(const struct sk_buff *skb)
{
/* LRO sets gso_size but not gso_type, whereas if GSO is really
* wanted then gso_type will be set. */
- struct skb_shared_info *shinfo = skb_shinfo(skb);
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+
if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
unlikely(shinfo->gso_type == 0)) {
__skb_warn_lro_forwarding(skb);
@@ -2258,7 +2483,7 @@ static inline void skb_forward_csum(struct sk_buff *skb)
* Instead of forcing ip_summed to CHECKSUM_NONE, we can
* use this helper, to document places where we make this assertion.
*/
-static inline void skb_checksum_none_assert(struct sk_buff *skb)
+static inline void skb_checksum_none_assert(const struct sk_buff *skb)
{
#ifdef DEBUG
BUG_ON(skb->ip_summed != CHECKSUM_NONE);
@@ -2266,5 +2491,26 @@ static inline void skb_checksum_none_assert(struct sk_buff *skb)
}
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
+
+static inline bool skb_is_recycleable(const struct sk_buff *skb, int skb_size)
+{
+ if (irqs_disabled())
+ return false;
+
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
+ return false;
+
+ if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ return false;
+
+ skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
+ if (skb_end_pointer(skb) - skb->head < skb_size)
+ return false;
+
+ if (skb_shared(skb) || skb_cloned(skb))
+ return false;
+
+ return true;
+}
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */