From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001 From: Wilfried Klaebe Date: Sun, 11 May 2014 00:12:32 +0000 Subject: net: get rid of SET_ETHTOOL_OPS net: get rid of SET_ETHTOOL_OPS Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone. This does that. Mostly done via coccinelle script: @@ struct ethtool_ops *ops; struct net_device *dev; @@ - SET_ETHTOOL_OPS(dev, ops); + dev->ethtool_ops = ops; Compile tested only, but I'd seriously wonder if this broke anything. Suggested-by: Dave Miller Signed-off-by: Wilfried Klaebe Acked-by: Felipe Balbi Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index c4b3940845e6..078cadd6c797 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = { void ipoib_set_ethtool_ops(struct net_device *dev) { - SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops); + dev->ethtool_ops = &ipoib_ethtool_ops; } -- cgit v1.2.3 From 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 Mon Sep 17 00:00:00 2001 From: Yuval Atias Date: Sun, 25 May 2014 17:47:27 +0300 Subject: net/mlx4_en: Use affinity hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “affinity hint” mechanism is used by the user space daemon, irqbalancer, to indicate a preferred CPU mask for irqs. Irqbalancer can use this hint to balance the irqs between the cpus indicated by the mask. We wish the HCA to preferentially map the IRQs it uses to numa cores close to it. To accomplish this, we use cpumask_set_cpu_local_first(), that sets the affinity hint according the following policy: First it maps IRQs to “close” numa cores. If these are exhausted, the remaining IRQs are mapped to “far” numa cores. Signed-off-by: Yuval Atias Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 199c7896f081..58b1f239ac2b 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq])) { + &ibdev->eq_table[eq], NULL)) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = -- cgit v1.2.3 From 96b2e73c5471542cb9c622c4360716684f8797ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jun 2014 00:18:48 -0700 Subject: Revert "net/mlx4_en: Use affinity hint" This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61. Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 58b1f239ac2b..199c7896f081 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq], NULL)) { + &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = -- cgit v1.2.3 From cf38be6d61001b234d5b980d6e98702587638190 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 6 Jun 2014 21:40:42 +0530 Subject: iw_cxgb4: Allocate and use IQs specifically for indirect interrupts Currently indirect interrupts for RDMA CQs funnel through the LLD's RDMA RXQs, which also handle direct interrupts for offload CPLs during RDMA connection setup/teardown. The intended T4 usage model, however, is to have indirect interrupts flow through dedicated IQs. IE not to mix indirect interrupts with CPL messages in an IQ. This patch adds the concept of RDMA concentrator IQs, or CIQs, setup and maintained by the LLD and exported to iw_cxgb4 for use when creating CQs. RDMA CPLs will flow through the LLD's RDMA RXQs, and CQ interrupts flow through the CIQs. Design: cxgb4 creates and exports an array of CIQs for the RDMA ULD. These IQs are sized according to the max available CQs available at adapter init. In addition, these IQs don't need FL buffers since they only service indirect interrupts. One CIQ is setup per RX channel similar to the RDMA RXQs. iw_cxgb4 will utilize these CIQs based on the vector value passed into create_cq(). The num_comp_vectors advertised by iw_cxgb4 will be the number of CIQs configured, and thus the vector value will be the index into the array of CIQs. Based on original work by Steve Wise Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cq.c | 7 ++++++- drivers/infiniband/hw/cxgb4/provider.c | 2 +- drivers/infiniband/hw/cxgb4/t4.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index cfaa56ada189..71fc2ef203fb 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -134,7 +134,8 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, V_FW_RI_RES_WR_IQANUS(0) | V_FW_RI_RES_WR_IQANUD(1) | F_FW_RI_RES_WR_IQANDST | - V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids)); + V_FW_RI_RES_WR_IQANDSTINDEX( + rdev->lldi.ciq_ids[cq->vector])); res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( F_FW_RI_RES_WR_IQDROPRSS | V_FW_RI_RES_WR_IQPCIECH(2) | @@ -870,6 +871,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, rhp = to_c4iw_dev(ibdev); + if (vector >= rhp->rdev.lldi.nciq) + return ERR_PTR(-EINVAL); + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); @@ -915,6 +919,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, } chp->cq.size = hwentries; chp->cq.memsize = memsize; + chp->cq.vector = vector; ret = create_cq(&rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index a94a3e12c349..31cd1882c47b 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -499,7 +499,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.node_type = RDMA_NODE_RNIC; memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev); dev->ibdev.query_device = c4iw_query_device; dev->ibdev.query_port = c4iw_query_port; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 2178f3198410..68b0a6bf4eb0 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -542,6 +542,7 @@ struct t4_cq { size_t memsize; __be64 bits_type_ts; u32 cqid; + int vector; u16 size; /* including status page */ u16 cidx; u16 sw_pidx; -- cgit v1.2.3 From 92e7ae71726ca9e16a8a88ebeee14eb5177575a1 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 6 Jun 2014 21:40:43 +0530 Subject: iw_cxgb4: Choose appropriate hw mtu index and ISS for iWARP connections Select the appropriate hw mtu index and initial sequence number to optimize hw memory performance. Add new cxgb4_best_aligned_mtu() which allows callers to provide enough information to be used to [possibly] select an MTU which will result in the TCP Data Segment Size (AKA Maximum Segment Size) to be an aligned value. If an RTR message exhange is required, then align the ISS to 8B - 1 + 4, so that after the SYN the send seqno will align on a 4B boundary. The RTR message exchange will leave the send seqno aligned on an 8B boundary. If an RTR is not required, then align the ISS to 8B - 1. The goal is to have the send seqno be 8B aligned when we send the first FPDU. Based on original work by Casey Leedom and Steve Wise Signed-off-by: Casey Leedom Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 73 ++++++++++++++++++++++++++----- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 1 + 2 files changed, 63 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1f863a96a480..d2e9f72ec9bf 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -232,12 +232,16 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) static void set_emss(struct c4iw_ep *ep, u16 opt) { - ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40; + ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - + sizeof(struct iphdr) - sizeof(struct tcphdr); ep->mss = ep->emss; if (GET_TCPOPT_TSTAMP(opt)) ep->emss -= 12; if (ep->emss < 128) ep->emss = 128; + if (ep->emss & 7) + PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n", + GET_TCPOPT_MSS(opt), ep->mss, ep->emss); PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt), ep->mss, ep->emss); } @@ -528,6 +532,17 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +static void best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts) +{ + unsigned short hdr_size = sizeof(struct iphdr) + + sizeof(struct tcphdr) + + (use_ts ? 12 : 0); + unsigned short data_size = mtu - hdr_size; + + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -565,7 +580,8 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps); wscale = compute_wscale(rcv_win); opt0 = (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | @@ -633,6 +649,13 @@ static int send_connect(struct c4iw_ep *ep) req6->opt2 = cpu_to_be32(opt2); } } else { + u32 isn = (prandom_u32() & ~7UL) - 1; + + opt2 |= T5_OPT_2_VALID; + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + if (peer2peer) + isn += 4; + if (ep->com.remote_addr.ss_family == AF_INET) { t5_req = (struct cpl_t5_act_open_req *) skb_put(skb, wrlen); @@ -649,6 +672,9 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t))); + t5_req->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, + be32_to_cpu(t5_req->rsvd)); t5_req->opt2 = cpu_to_be32(opt2); } else { t5_req6 = (struct cpl_t5_act_open_req6 *) @@ -672,6 +698,9 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); + t5_req6->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, + be32_to_cpu(t5_req6->rsvd)); t5_req6->opt2 = cpu_to_be32(opt2); } } @@ -1640,7 +1669,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps); wscale = compute_wscale(rcv_win); req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | (nocong ? NO_CONG(1) : 0) | @@ -1986,12 +2016,26 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, u64 opt0; u32 opt2; int wscale; + struct cpl_t5_pass_accept_rpl *rpl5 = NULL; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); + skb_get(skb); - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + rpl = cplhdr(skb); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp); wscale = compute_wscale(rcv_win); opt0 = (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | @@ -2023,14 +2067,18 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, opt2 |= CCTRL_ECN(1); } if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID; opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + rpl5 = (void *)rpl; + memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); + if (peer2peer) + isn += 4; + rpl5->iss = cpu_to_be32(isn); + PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss)); } - rpl = cplhdr(skb); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); rpl->opt0 = cpu_to_be64(opt0); rpl->opt2 = cpu_to_be32(opt2); set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); @@ -2095,6 +2143,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) int err; u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; + unsigned short hdrs; parent_ep = lookup_stid(t, stid); if (!parent_ep) { @@ -2152,8 +2201,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - if (peer_mss && child_ep->mtu > (peer_mss + 40)) - child_ep->mtu = peer_mss + 40; + hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) + + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); + if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) + child_ep->mtu = peer_mss + hdrs; state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 6121ca08fe58..91289a051af9 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -848,6 +848,7 @@ enum { /* TCP congestion control algorithms */ #define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) #define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) +#define CONG_CNTRL_VALID (1 << 18) #define T5_OPT_2_VALID (1 << 31) #endif /* _T4FW_RI_API_H_ */ -- cgit v1.2.3 From b408ff282dda0ef7a3218dc2e5f1399c665d4c20 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 6 Jun 2014 21:40:44 +0530 Subject: iw_cxgb4: don't truncate the recv window size Fixed a bug that shows up with recv window sizes that exceed the size of the RCV_BUFSIZ field in opt0 (>= 1024K). If the recv window exceeds this, then we specify the max possible in opt0, add add the rest in via a RX_DATA_ACK credits. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 54 +++++++++++++++++++++++++++++++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 ++ 2 files changed, 52 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d2e9f72ec9bf..965eaafd5851 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -472,7 +472,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; - flowc->mnemval[6].val = cpu_to_be32(snd_win); + flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); /* Pad WR to 16 byte boundary */ @@ -565,6 +565,7 @@ static int send_connect(struct c4iw_ep *ep) struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr; struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; + int win; wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? roundup(sizev4, 16) : @@ -583,6 +584,15 @@ static int send_connect(struct c4iw_ep *ep) best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps); wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + opt0 = (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -593,7 +603,7 @@ static int send_connect(struct c4iw_ep *ep) SMAC_SEL(ep->smac_idx) | DSCP(ep->tos) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win>>10); + RCV_BUFSIZ(win); opt2 = RX_CHANNEL(0) | CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); @@ -1174,6 +1184,14 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return 0; } + /* + * If we couldn't specify the entire rcv window at connection setup + * due to the limit in the number of bits in the RCV_BUFSIZ field, + * then add the overage in to the credits returned. + */ + if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024) + credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024; + req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); @@ -1647,6 +1665,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) unsigned int mtu_idx; int wscale; struct sockaddr_in *sin; + int win; skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); @@ -1672,6 +1691,15 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps); wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | @@ -1683,7 +1711,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) SMAC_SEL(ep->smac_idx) | DSCP(ep->tos) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win >> 10)); + RCV_BUFSIZ(win)); req->tcb.opt2 = (__force __be32) (PACE(1) | TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | RX_CHANNEL(0) | @@ -1720,6 +1748,13 @@ static int is_neg_adv(unsigned int status) status == CPL_ERR_KEEPALV_NEG_ADVICE; } +static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) +{ + ep->snd_win = snd_win; + ep->rcv_win = rcv_win; + PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win); +} + #define ACT_OPEN_RETRY_COUNT 2 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, @@ -1768,6 +1803,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, ep->ctrlq_idx = cxgb4_port_idx(pdev); ep->rss_qid = cdev->rdev.lldi.rxq_ids[ cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); dev_put(pdev); } else { pdev = get_real_dev(n->dev); @@ -1786,6 +1822,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, cdev->rdev.lldi.nchan; ep->rss_qid = cdev->rdev.lldi.rxq_ids[ cxgb4_port_idx(n->dev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); if (clear_mpa_v1) { ep->retry_with_mpa_v1 = 0; @@ -2017,6 +2054,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, u32 opt2; int wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; + int win; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(skb_cloned(skb)); @@ -2037,6 +2075,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp); wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; opt0 = (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -2047,7 +2093,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, SMAC_SEL(ep->smac_idx) | DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win>>10); + RCV_BUFSIZ(win); opt2 = RX_CHANNEL(0) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7474b490760a..7493dfeb812a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -805,6 +805,8 @@ struct c4iw_ep { u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; unsigned int retry_count; + int snd_win; + int rcv_win; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) -- cgit v1.2.3