diff options
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 338 |
1 files changed, 292 insertions, 46 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0a4f3ab0cc0..fe864885c1ed 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -20,7 +20,7 @@ #include <linux/time.h> #include <linux/cache.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/types.h> #include <linux/spinlock.h> #include <linux/net.h> @@ -29,6 +29,7 @@ #include <linux/rcupdate.h> #include <linux/dmaengine.h> #include <linux/hrtimer.h> +#include <linux/dma-mapping.h> /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 @@ -45,6 +46,11 @@ #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) +/* return minimum truesize of one skb containing X bytes of data */ +#define SKB_TRUESIZE(X) ((X) + \ + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + /* A. Checksumming of received packets by device. * * NONE: device failed to checksum this packet. @@ -134,7 +140,9 @@ struct sk_buff; typedef struct skb_frag_struct skb_frag_t; struct skb_frag_struct { - struct page *page; + struct { + struct page *p; + } page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; __u32 size; @@ -144,6 +152,26 @@ struct skb_frag_struct { #endif }; +static inline unsigned int skb_frag_size(const skb_frag_t *frag) +{ + return frag->size; +} + +static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) +{ + frag->size = size; +} + +static inline void skb_frag_size_add(skb_frag_t *frag, int delta) +{ + frag->size += delta; +} + +static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) +{ + frag->size -= delta; +} + #define HAVE_HW_TIME_STAMP /** @@ -187,6 +215,20 @@ enum { /* ensure the originating sk reference is available on driver level */ SKBTX_DRV_NEEDS_SK_REF = 1 << 3, + + /* device driver supports TX zero-copy buffers */ + SKBTX_DEV_ZEROCOPY = 1 << 4, +}; + +/* + * The callback notifies userspace to release buffers when skb DMA is done in + * lower device, the skb last reference should be 0 when calling this. + * The desc is used to track userspace buffer index. + */ +struct ubuf_info { + void (*callback)(void *); + void *arg; + unsigned long desc; }; /* This data is invariant across clones and lives at @@ -211,6 +253,7 @@ struct skb_shared_info { /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ void * destructor_arg; + /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; }; @@ -270,15 +313,12 @@ typedef unsigned char *sk_buff_data_t; * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @sk: Socket we are owned by * @tstamp: Time we arrived + * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by - * @transport_header: Transport layer header - * @network_header: Network layer header - * @mac_header: Link layer header + * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm - * @cb: Control buffer. Free for use by every layer. Put private vars here * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header @@ -286,40 +326,47 @@ typedef unsigned char *sk_buff_data_t; * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored + * @priority: Packet queueing priority * @local_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) + * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header + * @nfctinfo: Relationship of this skb to the connection * @pkt_type: Packet class * @fclone: skbuff clone status - * @ip_summed: Driver fed us an IP checksum - * @priority: Packet queueing priority - * @users: User count - see {datagram,tcp}.c - * @protocol: Packet protocol from driver - * @truesize: Buffer size - * @head: Head of buffer - * @data: Data head pointer - * @tail: Tail pointer - * @end: End pointer - * @destructor: Destruct function - * @mark: Generic packet mark - * @nfct: Associated connection, if any * @ipvs_property: skbuff is owned by ipvs * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag - * @nfctinfo: Relationship of this skb to the connection + * @protocol: Packet protocol from driver + * @destructor: Destruct function + * @nfct: Associated connection, if any * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on - * @rxhash: the packet hash computed on receive - * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict + * @rxhash: the packet hash computed on receive + * @queue_mapping: Queue mapping for multiqueue devices * @ndisc_nodetype: router type (from link layer) + * @ooo_okay: allow the mapping of a socket to a queue to be changed + * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport + * ports. * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking + * @mark: Generic packet mark + * @dropcount: total number of sk_receive_queue overflows * @vlan_tci: vlan tag control information + * @transport_header: Transport layer header + * @network_header: Network layer header + * @mac_header: Link layer header + * @tail: Tail pointer + * @end: End pointer + * @head: Head of buffer + * @data: Data head pointer + * @truesize: Buffer size + * @users: User count - see {datagram,tcp}.c */ struct sk_buff { @@ -397,6 +444,7 @@ struct sk_buff { __u8 ndisc_nodetype:2; #endif __u8 ooo_okay:1; + __u8 l4_rxhash:1; kmemcheck_bitfield_end(flags2); /* 0/13 bit hole */ @@ -504,9 +552,11 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, NUMA_NO_NODE); } +extern void skb_recycle(struct sk_buff *skb); extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, @@ -555,11 +605,11 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); -extern __u32 __skb_get_rxhash(struct sk_buff *skb); +extern void __skb_get_rxhash(struct sk_buff *skb); static inline __u32 skb_get_rxhash(struct sk_buff *skb) { if (!skb->rxhash) - skb->rxhash = __skb_get_rxhash(skb); + __skb_get_rxhash(skb); return skb->rxhash; } @@ -805,9 +855,9 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, * The reference count is not incremented and the reference is therefore * volatile. Use with caution. */ -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) +static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) { - struct sk_buff *list = ((struct sk_buff *)list_)->next; + struct sk_buff *list = ((const struct sk_buff *)list_)->next; if (list == (struct sk_buff *)list_) list = NULL; return list; @@ -826,9 +876,9 @@ static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) * The reference count is not incremented and the reference is therefore * volatile. Use with caution. */ -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) +static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { - struct sk_buff *list = ((struct sk_buff *)list_)->prev; + struct sk_buff *list = ((const struct sk_buff *)list_)->prev; if (list == (struct sk_buff *)list_) list = NULL; return list; @@ -1105,18 +1155,51 @@ static inline int skb_pagelen(const struct sk_buff *skb) int i, len = 0; for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) - len += skb_shinfo(skb)->frags[i].size; + len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len + skb_headlen(skb); } -static inline void skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +/** + * __skb_fill_page_desc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised + * @i: paged fragment index to initialise + * @page: the page to use for this fragment + * @off: the offset to the data with @page + * @size: the length of the data + * + * Initialises the @i'th fragment of @skb to point to &size bytes at + * offset @off within @page. + * + * Does not take any additional reference on the fragment. + */ +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; + frag->page.p = page; frag->page_offset = off; - frag->size = size; + skb_frag_size_set(frag, size); +} + +/** + * skb_fill_page_desc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised + * @i: paged fragment index to initialise + * @page: the page to use for this fragment + * @off: the offset to the data with @page + * @size: the length of the data + * + * As per __skb_fill_page_desc() -- initialises the @i'th fragment of + * @skb to point to &size bytes at offset @off within @page. In + * addition updates @skb such that @i is the last fragment. + * + * Does not take any additional reference on the fragment. + */ +static inline void skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_page_desc(skb, i, page, off, size); skb_shinfo(skb)->nr_frags = i + 1; } @@ -1562,16 +1645,22 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } -static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, - unsigned int length) +static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length, gfp_t gfp) { - struct sk_buff *skb = netdev_alloc_skb(dev, length + NET_IP_ALIGN); + struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); if (NET_IP_ALIGN && skb) skb_reserve(skb, NET_IP_ALIGN); return skb; } +static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); +} + /** * __netdev_alloc_page - allocate a page for ps-rx on a specific device * @dev: network device to receive on @@ -1605,6 +1694,137 @@ static inline void netdev_free_page(struct net_device *dev, struct page *page) } /** + * skb_frag_page - retrieve the page refered to by a paged fragment + * @frag: the paged fragment + * + * Returns the &struct page associated with @frag. + */ +static inline struct page *skb_frag_page(const skb_frag_t *frag) +{ + return frag->page.p; +} + +/** + * __skb_frag_ref - take an addition reference on a paged fragment. + * @frag: the paged fragment + * + * Takes an additional reference on the paged fragment @frag. + */ +static inline void __skb_frag_ref(skb_frag_t *frag) +{ + get_page(skb_frag_page(frag)); +} + +/** + * skb_frag_ref - take an addition reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset. + * + * Takes an additional reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_ref(struct sk_buff *skb, int f) +{ + __skb_frag_ref(&skb_shinfo(skb)->frags[f]); +} + +/** + * __skb_frag_unref - release a reference on a paged fragment. + * @frag: the paged fragment + * + * Releases a reference on the paged fragment @frag. + */ +static inline void __skb_frag_unref(skb_frag_t *frag) +{ + put_page(skb_frag_page(frag)); +} + +/** + * skb_frag_unref - release a reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset + * + * Releases a reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_unref(struct sk_buff *skb, int f) +{ + __skb_frag_unref(&skb_shinfo(skb)->frags[f]); +} + +/** + * skb_frag_address - gets the address of the data contained in a paged fragment + * @frag: the paged fragment buffer + * + * Returns the address of the data within @frag. The page must already + * be mapped. + */ +static inline void *skb_frag_address(const skb_frag_t *frag) +{ + return page_address(skb_frag_page(frag)) + frag->page_offset; +} + +/** + * skb_frag_address_safe - gets the address of the data contained in a paged fragment + * @frag: the paged fragment buffer + * + * Returns the address of the data within @frag. Checks that the page + * is mapped and returns %NULL otherwise. + */ +static inline void *skb_frag_address_safe(const skb_frag_t *frag) +{ + void *ptr = page_address(skb_frag_page(frag)); + if (unlikely(!ptr)) + return NULL; + + return ptr + frag->page_offset; +} + +/** + * __skb_frag_set_page - sets the page contained in a paged fragment + * @frag: the paged fragment + * @page: the page to set + * + * Sets the fragment @frag to contain @page. + */ +static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) +{ + frag->page.p = page; +} + +/** + * skb_frag_set_page - sets the page contained in a paged fragment of an skb + * @skb: the buffer + * @f: the fragment offset + * @page: the page to set + * + * Sets the @f'th fragment of @skb to contain @page. + */ +static inline void skb_frag_set_page(struct sk_buff *skb, int f, + struct page *page) +{ + __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page); +} + +/** + * skb_frag_dma_map - maps a paged fragment via the DMA API + * @dev: the device to map the fragment to + * @frag: the paged fragment to map + * @offset: the offset within the fragment (starting at the + * fragment's own offset) + * @size: the number of bytes to map + * @dir: the direction of the mapping (%PCI_DMA_*) + * + * Maps the page associated with @frag to @device. + */ +static inline dma_addr_t skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + return dma_map_page(dev, skb_frag_page(frag), + frag->page_offset + offset, size, dir); +} + +/** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check * @len: length up to which to write @@ -1612,7 +1832,7 @@ static inline void netdev_free_page(struct net_device *dev, struct page *page) * Returns true if modifying the header part of the cloned buffer * does not requires the data to be copied. */ -static inline int skb_clone_writable(struct sk_buff *skb, unsigned int len) +static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len) { return !skb_header_cloned(skb) && skb_headroom(skb) + len <= skb->hdr_len; @@ -1706,13 +1926,13 @@ static inline int skb_add_data(struct sk_buff *skb, } static inline int skb_can_coalesce(struct sk_buff *skb, int i, - struct page *page, int off) + const struct page *page, int off) { if (i) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; - return page == frag->page && - off == frag->page_offset + frag->size; + return page == skb_frag_page(frag) && + off == frag->page_offset + skb_frag_size(frag); } return 0; } @@ -1996,8 +2216,13 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) /** * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps * + * PHY drivers may accept clones of transmitted packets for + * timestamping via their phy_driver.txtstamp method. These drivers + * must call this function to return the skb back to the stack, with + * or without a timestamp. + * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps + * @hwtstamps: hardware time stamps, may be NULL if not available * */ void skb_complete_tx_timestamp(struct sk_buff *skb, @@ -2028,8 +2253,7 @@ static inline void sw_tx_timestamp(struct sk_buff *skb) * skb_tx_timestamp() - Driver hook for transmit timestamping * * Ethernet MAC Drivers should call this function in their hard_xmit() - * function as soon as possible after giving the sk_buff to the MAC - * hardware, but before freeing the sk_buff. + * function immediately before giving the sk_buff to the MAC hardware. * * @skb: A socket buffer. */ @@ -2234,7 +2458,8 @@ static inline bool skb_warn_if_lro(const struct sk_buff *skb) { /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ - struct skb_shared_info *shinfo = skb_shinfo(skb); + const struct skb_shared_info *shinfo = skb_shinfo(skb); + if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) { __skb_warn_lro_forwarding(skb); @@ -2258,7 +2483,7 @@ static inline void skb_forward_csum(struct sk_buff *skb) * Instead of forcing ip_summed to CHECKSUM_NONE, we can * use this helper, to document places where we make this assertion. */ -static inline void skb_checksum_none_assert(struct sk_buff *skb) +static inline void skb_checksum_none_assert(const struct sk_buff *skb) { #ifdef DEBUG BUG_ON(skb->ip_summed != CHECKSUM_NONE); @@ -2266,5 +2491,26 @@ static inline void skb_checksum_none_assert(struct sk_buff *skb) } bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); + +static inline bool skb_is_recycleable(const struct sk_buff *skb, int skb_size) +{ + if (irqs_disabled()) + return false; + + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) + return false; + + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) + return false; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); + if (skb_end_pointer(skb) - skb->head < skb_size) + return false; + + if (skb_shared(skb) || skb_cloned(skb)) + return false; + + return true; +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ |