From 5c59801f7018acba11b12de59017a3fcdcf7421d Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 22 Jan 2019 09:16:19 +0100 Subject: clk: sunxi-ng: v3s: Fix TCON reset de-assert bit According to the datasheet and the reference code from Allwinner, the bit used to de-assert the TCON reset is bit 4, not bit 3. Fix it in the V3s CCU driver. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c index 621b1cd996db..ac12f261f8ca 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c @@ -542,7 +542,7 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = { [RST_BUS_OHCI0] = { 0x2c0, BIT(29) }, [RST_BUS_VE] = { 0x2c4, BIT(0) }, - [RST_BUS_TCON0] = { 0x2c4, BIT(3) }, + [RST_BUS_TCON0] = { 0x2c4, BIT(4) }, [RST_BUS_CSI] = { 0x2c4, BIT(8) }, [RST_BUS_DE] = { 0x2c4, BIT(12) }, [RST_BUS_DBG] = { 0x2c4, BIT(31) }, -- cgit v1.2.3 From 6db2983cd8064808141ccefd75218f5b4345ffae Mon Sep 17 00:00:00 2001 From: Eugene Loh Date: Thu, 17 Jan 2019 14:46:00 -0800 Subject: kallsyms: Handle too long symbols in kallsyms.c When checking for symbols with excessively long names, account for null terminating character. Fixes: f3462aa952cf ("Kbuild: Handle longer symbols in kallsyms.c") Signed-off-by: Eugene Loh Acked-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- scripts/kallsyms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 77cebad0474e..f75e7bda4889 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -118,8 +118,8 @@ static int read_symbol(FILE *in, struct sym_entry *s) fprintf(stderr, "Read error or end of file.\n"); return -1; } - if (strlen(sym) > KSYM_NAME_LEN) { - fprintf(stderr, "Symbol %s too long for kallsyms (%zu vs %d).\n" + if (strlen(sym) >= KSYM_NAME_LEN) { + fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n" "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", sym, strlen(sym), KSYM_NAME_LEN); return -1; -- cgit v1.2.3 From ee0b27a3a4da0b0ed2318aa092f8856896e9450b Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 23 Jan 2019 00:59:11 +0000 Subject: clk: sunxi: A31: Fix wrong AHB gate number According to the manual the gate clock for MMC3 is at bit 11, and NAND1 is controlled by bit 12. Fix the gate bit definitions in the clock driver. Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Signed-off-by: Andre Przywara Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 3b97f60540ad..609970c0b666 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -264,9 +264,9 @@ static SUNXI_CCU_GATE(ahb1_mmc1_clk, "ahb1-mmc1", "ahb1", static SUNXI_CCU_GATE(ahb1_mmc2_clk, "ahb1-mmc2", "ahb1", 0x060, BIT(10), 0); static SUNXI_CCU_GATE(ahb1_mmc3_clk, "ahb1-mmc3", "ahb1", - 0x060, BIT(12), 0); + 0x060, BIT(11), 0); static SUNXI_CCU_GATE(ahb1_nand1_clk, "ahb1-nand1", "ahb1", - 0x060, BIT(13), 0); + 0x060, BIT(12), 0); static SUNXI_CCU_GATE(ahb1_nand0_clk, "ahb1-nand0", "ahb1", 0x060, BIT(13), 0); static SUNXI_CCU_GATE(ahb1_sdram_clk, "ahb1-sdram", "ahb1", -- cgit v1.2.3 From 09db51241118aeb06e1c8cd393b45879ce099b36 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 28 Jan 2019 09:35:35 +0100 Subject: esp: Skip TX bytes accounting when sending from a request socket On ESP output, sk_wmem_alloc is incremented for the added padding if a socket is associated to the skb. When replying with TCP SYNACKs over IPsec, the associated sk is a casted request socket, only. Increasing sk_wmem_alloc on a request socket results in a write at an arbitrary struct offset. In the best case, this produces the following WARNING: WARNING: CPU: 1 PID: 0 at lib/refcount.c:102 esp_output_head+0x2e4/0x308 [esp4] refcount_t: addition on 0; use-after-free. CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.0.0-rc3 #2 Hardware name: Marvell Armada 380/385 (Device Tree) [...] [] (esp_output_head [esp4]) from [] (esp_output+0xb8/0x180 [esp4]) [] (esp_output [esp4]) from [] (xfrm_output_resume+0x558/0x664) [] (xfrm_output_resume) from [] (xfrm4_output+0x44/0xc4) [] (xfrm4_output) from [] (tcp_v4_send_synack+0xa8/0xe8) [] (tcp_v4_send_synack) from [] (tcp_conn_request+0x7f4/0x948) [] (tcp_conn_request) from [] (tcp_rcv_state_process+0x2a0/0xe64) [] (tcp_rcv_state_process) from [] (tcp_v4_do_rcv+0xf0/0x1f4) [] (tcp_v4_do_rcv) from [] (tcp_v4_rcv+0xdb8/0xe20) [] (tcp_v4_rcv) from [] (ip_protocol_deliver_rcu+0x2c/0x2dc) [] (ip_protocol_deliver_rcu) from [] (ip_local_deliver_finish+0x48/0x54) [] (ip_local_deliver_finish) from [] (ip_local_deliver+0x54/0xec) [] (ip_local_deliver) from [] (ip_rcv+0x48/0xb8) [] (ip_rcv) from [] (__netif_receive_skb_one_core+0x50/0x6c) [...] The issue triggers only when not using TCP syncookies, as for syncookies no socket is associated. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Signed-off-by: Martin Willi Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 2 +- net/ipv6/esp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 5459f41fc26f..10e809b296ec 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -328,7 +328,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5afe9f83374d..239d4a65ad6e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -296,7 +296,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; -- cgit v1.2.3 From 48396e80fb6526ea5ed267bd84f028bae56d2f9e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 30 Jan 2019 14:05:55 -0800 Subject: RDMA/srp: Rework SCSI device reset handling Since .scsi_done() must only be called after scsi_queue_rq() has finished, make sure that the SRP initiator driver does not call .scsi_done() while scsi_queue_rq() is in progress. Although invoking sg_reset -d while I/O is in progress works fine with kernel v4.20 and before, that is not the case with kernel v5.0-rc1. This patch avoids that the following crash is triggered with kernel v5.0-rc1: BUG: unable to handle kernel NULL pointer dereference at 0000000000000138 CPU: 0 PID: 360 Comm: kworker/0:1H Tainted: G B 5.0.0-rc1-dbg+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Workqueue: kblockd blk_mq_run_work_fn RIP: 0010:blk_mq_dispatch_rq_list+0x116/0xb10 Call Trace: blk_mq_sched_dispatch_requests+0x2f7/0x300 __blk_mq_run_hw_queue+0xd6/0x180 blk_mq_run_work_fn+0x27/0x30 process_one_work+0x4f1/0xa20 worker_thread+0x67/0x5b0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 Cc: Fixes: 94a9174c630c ("IB/srp: reduce lock coverage of command completion") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 31d91538bbf4..694324b37480 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3032,7 +3032,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; - int i, j; u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); @@ -3044,15 +3043,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) if (status) return FAILED; - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; - - srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); - } - } - return SUCCESS; } -- cgit v1.2.3 From f75a2804da391571563c4b6b29e7797787332673 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 31 Jan 2019 13:05:49 -0800 Subject: xfrm: destroy xfrm_state synchronously on net exit path xfrm_state_put() moves struct xfrm_state to the GC list and schedules the GC work to clean it up. On net exit call path, xfrm_state_flush() is called to clean up and xfrm_flush_gc() is called to wait for the GC work to complete before exit. However, this doesn't work because one of the ->destructor(), ipcomp_destroy(), schedules the same GC work again inside the GC work. It is hard to wait for such a nested async callback. This is also why syzbot still reports the following warning: WARNING: CPU: 1 PID: 33 at net/ipv6/xfrm6_tunnel.c:351 xfrm6_tunnel_net_exit+0x2cb/0x500 net/ipv6/xfrm6_tunnel.c:351 ... ops_exit_list.isra.0+0xb0/0x160 net/core/net_namespace.c:153 cleanup_net+0x51d/0xb10 net/core/net_namespace.c:551 process_one_work+0xd0c/0x1ce0 kernel/workqueue.c:2153 worker_thread+0x143/0x14a0 kernel/workqueue.c:2296 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 In fact, it is perfectly fine to bypass GC and destroy xfrm_state synchronously on net exit call path, because it is in process context and doesn't need a work struct to do any blocking work. This patch introduces xfrm_state_put_sync() which simply bypasses GC, and lets its callers to decide whether to use this synchronous version. On net exit path, xfrm_state_fini() and xfrm6_tunnel_net_exit() use it. And, as ipcomp_destroy() itself is blocking, it can use xfrm_state_put_sync() directly too. Also rename xfrm_state_gc_destroy() to ___xfrm_state_destroy() to reflect this change. Fixes: b48c05ab5d32 ("xfrm: Fix warning in xfrm6_tunnel_net_exit.") Reported-and-tested-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com Cc: Steffen Klassert Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 12 +++++++++--- net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 30 +++++++++++++++++++----------- net/xfrm/xfrm_user.c | 2 +- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7298a53b9702..85386becbaea 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -853,7 +853,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) xfrm_pol_put(pols[i]); } -void __xfrm_state_destroy(struct xfrm_state *); +void __xfrm_state_destroy(struct xfrm_state *, bool); static inline void __xfrm_state_put(struct xfrm_state *x) { @@ -863,7 +863,13 @@ static inline void __xfrm_state_put(struct xfrm_state *x) static inline void xfrm_state_put(struct xfrm_state *x) { if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x); + __xfrm_state_destroy(x, false); +} + +static inline void xfrm_state_put_sync(struct xfrm_state *x) +{ + if (refcount_dec_and_test(&x->refcnt)) + __xfrm_state_destroy(x, true); } static inline void xfrm_state_hold(struct xfrm_state *x) @@ -1590,7 +1596,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f5b4febeaa25..bc65db782bfb 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -344,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); xfrm_flush_gc(); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); diff --git a/net/key/af_key.c b/net/key/af_key.c index 655c787f9d54..637030f43b67 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1783,7 +1783,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m if (proto == 0) return -EINVAL; - err = xfrm_state_flush(net, proto, true); + err = xfrm_state_flush(net, proto, true, false); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 23c92891758a..1bb971f46fc6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -432,7 +432,7 @@ void xfrm_state_free(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_free); -static void xfrm_state_gc_destroy(struct xfrm_state *x) +static void ___xfrm_state_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); @@ -474,7 +474,7 @@ static void xfrm_state_gc_task(struct work_struct *work) synchronize_rcu(); hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) - xfrm_state_gc_destroy(x); + ___xfrm_state_destroy(x); } static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) @@ -598,14 +598,19 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) } EXPORT_SYMBOL(xfrm_state_alloc); -void __xfrm_state_destroy(struct xfrm_state *x) +void __xfrm_state_destroy(struct xfrm_state *x, bool sync) { WARN_ON(x->km.state != XFRM_STATE_DEAD); - spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &xfrm_state_gc_list); - spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); + if (sync) { + synchronize_rcu(); + ___xfrm_state_destroy(x); + } else { + spin_lock_bh(&xfrm_state_gc_lock); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); + spin_unlock_bh(&xfrm_state_gc_lock); + schedule_work(&xfrm_state_gc_work); + } } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -708,7 +713,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool } #endif -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync) { int i, err = 0, cnt = 0; @@ -730,7 +735,10 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, task_valid); - xfrm_state_put(x); + if (sync) + xfrm_state_put_sync(x); + else + xfrm_state_put(x); if (!err) cnt++; @@ -2215,7 +2223,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) if (atomic_read(&t->tunnel_users) == 2) xfrm_state_delete(t); atomic_dec(&t->tunnel_users); - xfrm_state_put(t); + xfrm_state_put_sync(t); x->tunnel = NULL; } } @@ -2375,8 +2383,8 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); flush_work(&xfrm_state_gc_work); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c6d26afcf89d..a131f9ff979e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1932,7 +1932,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_usersa_flush *p = nlmsg_data(nlh); int err; - err = xfrm_state_flush(net, p->proto, true); + err = xfrm_state_flush(net, p->proto, true, false); if (err) { if (err == -ESRCH) /* empty table */ return 0; -- cgit v1.2.3 From fa84667b98fd1a191b2465d66b440bda6714b3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20van=20Dorst?= Date: Wed, 30 Jan 2019 17:10:49 +0100 Subject: gpio: MT7621: use a per instance irq_chip structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the kernel complains: gpio gpiochip1: (1e000600.gpio-bank1): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip2: (1e000600.gpio-bank2): detected irqchip that is shared with multiple gpiochips: please fix the driver. Fixes: 4ba9c3afda41 ("gpio: mt7621: Add a driver for MT7621") Cc: stable@vger.kernel.org Signed-off-by: René van Dorst Cc: linux-gpio@vger.kernel.org Cc: linux-mediatek@lists.infradead.org Tested-by: Greg Ungerer Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mt7621.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c index 00e954f22bc9..74401e0adb29 100644 --- a/drivers/gpio/gpio-mt7621.c +++ b/drivers/gpio/gpio-mt7621.c @@ -30,6 +30,7 @@ #define GPIO_REG_EDGE 0xA0 struct mtk_gc { + struct irq_chip irq_chip; struct gpio_chip chip; spinlock_t lock; int bank; @@ -189,13 +190,6 @@ mediatek_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip mediatek_gpio_irq_chip = { - .irq_unmask = mediatek_gpio_irq_unmask, - .irq_mask = mediatek_gpio_irq_mask, - .irq_mask_ack = mediatek_gpio_irq_mask, - .irq_set_type = mediatek_gpio_irq_type, -}; - static int mediatek_gpio_xlate(struct gpio_chip *chip, const struct of_phandle_args *spec, u32 *flags) @@ -254,6 +248,13 @@ mediatek_gpio_bank_probe(struct device *dev, return ret; } + rg->irq_chip.name = dev_name(dev); + rg->irq_chip.parent_device = dev; + rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask; + rg->irq_chip.irq_mask = mediatek_gpio_irq_mask; + rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask; + rg->irq_chip.irq_set_type = mediatek_gpio_irq_type; + if (mtk->gpio_irq) { /* * Manually request the irq here instead of passing @@ -270,14 +271,14 @@ mediatek_gpio_bank_probe(struct device *dev, return ret; } - ret = gpiochip_irqchip_add(&rg->chip, &mediatek_gpio_irq_chip, + ret = gpiochip_irqchip_add(&rg->chip, &rg->irq_chip, 0, handle_simple_irq, IRQ_TYPE_NONE); if (ret) { dev_err(dev, "failed to add gpiochip_irqchip\n"); return ret; } - gpiochip_set_chained_irqchip(&rg->chip, &mediatek_gpio_irq_chip, + gpiochip_set_chained_irqchip(&rg->chip, &rg->irq_chip, mtk->gpio_irq, NULL); } @@ -310,7 +311,6 @@ mediatek_gpio_probe(struct platform_device *pdev) mtk->gpio_irq = irq_of_parse_and_map(np, 0); mtk->dev = dev; platform_set_drvdata(pdev, mtk); - mediatek_gpio_irq_chip.name = dev_name(dev); for (i = 0; i < MTK_BANK_CNT; i++) { ret = mediatek_gpio_bank_probe(dev, np, i); -- cgit v1.2.3 From a5a08c35d382a5a8da397260c3febb8dff4bdeef Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 31 Jan 2019 09:29:53 -0800 Subject: pinctrl: qcom: qcs404: Correct SDC tile The SDC controls live in the south tile, not the north one. Correct this so that we program the right registers. Cc: stable@vger.kernel.org Fixes: 22eb8301dbc1 ("pinctrl: qcom: Add qcs404 pinctrl driver") Signed-off-by: Bjorn Andersson Reviewed-by: Vinod Koul Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-qcs404.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs404.c b/drivers/pinctrl/qcom/pinctrl-qcs404.c index 7aae52a09ff0..4ffd56ff809e 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs404.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs404.c @@ -79,7 +79,7 @@ enum { .intr_cfg_reg = 0, \ .intr_status_reg = 0, \ .intr_target_reg = 0, \ - .tile = NORTH, \ + .tile = SOUTH, \ .mux_bit = -1, \ .pull_bit = pull, \ .drv_bit = drv, \ -- cgit v1.2.3 From b10bd9a256aec504c14a7c9b6fccb6301ecf290a Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Mon, 11 Feb 2019 10:20:49 +0100 Subject: s390: vsie: Use effective CRYCBD.31 to check CRYCBD validity When facility.76 MSAX3 is present for the guest we must issue a validity interception if the CRYCBD is not valid. The bit CRYCBD.31 is an effective field and tested at each guest level and has for effect to mask the facility.76 It follows that if CRYCBD.31 is clear and AP is not in use we do not have to test the CRYCBD validatity even if facility.76 is present in the host. Fixes: 6ee74098201b ("KVM: s390: vsie: allow CRYCB FORMAT-0") Cc: stable@vger.kernel.org Signed-off-by: Pierre Morel Reported-by: Claudio Imbrenda Acked-by: David Hildenbrand Acked-by: Cornelia Huck Reviewed-by: Christian Borntraeger Message-Id: <1549876849-32680-1-git-send-email-pmorel@linux.ibm.com> Signed-off-by: Christian Borntraeger --- arch/s390/kvm/vsie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a153257bf7d9..d62fa148558b 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -297,7 +297,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->crycbd = 0; apie_h = vcpu->arch.sie_block->eca & ECA_APIE; - if (!apie_h && !key_msk) + if (!apie_h && (!key_msk || fmt_o == CRYCB_FORMAT0)) return 0; if (!crycb_addr) -- cgit v1.2.3 From fc2d5cfdcfe2ab76b263d91429caa22451123085 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Thu, 7 Feb 2019 13:33:21 -0700 Subject: af_key: unconditionally clone on broadcast Attempting to avoid cloning the skb when broadcasting by inflating the refcount with sock_hold/sock_put while under RCU lock is dangerous and violates RCU principles. It leads to subtle race conditions when attempting to free the SKB, as we may reference sockets that have already been freed by the stack. Unable to handle kernel paging request at virtual address 6b6b6b6b6b6c4b [006b6b6b6b6b6c4b] address between user and kernel address ranges Internal error: Oops: 96000004 [#1] PREEMPT SMP task: fffffff78f65b380 task.stack: ffffff8049a88000 pc : sock_rfree+0x38/0x6c lr : skb_release_head_state+0x6c/0xcc Process repro (pid: 7117, stack limit = 0xffffff8049a88000) Call trace: sock_rfree+0x38/0x6c skb_release_head_state+0x6c/0xcc skb_release_all+0x1c/0x38 __kfree_skb+0x1c/0x30 kfree_skb+0xd0/0xf4 pfkey_broadcast+0x14c/0x18c pfkey_sendmsg+0x1d8/0x408 sock_sendmsg+0x44/0x60 ___sys_sendmsg+0x1d0/0x2a8 __sys_sendmsg+0x64/0xb4 SyS_sendmsg+0x34/0x4c el0_svc_naked+0x34/0x38 Kernel panic - not syncing: Fatal exception Suggested-by: Eric Dumazet Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert --- net/key/af_key.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 637030f43b67..5651c29cb5bd 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -196,30 +196,22 @@ static int pfkey_release(struct socket *sock) return 0; } -static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, - gfp_t allocation, struct sock *sk) +static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, + struct sock *sk) { int err = -ENOBUFS; - sock_hold(sk); - if (*skb2 == NULL) { - if (refcount_read(&skb->users) != 1) { - *skb2 = skb_clone(skb, allocation); - } else { - *skb2 = skb; - refcount_inc(&skb->users); - } - } - if (*skb2 != NULL) { - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { - skb_set_owner_r(*skb2, sk); - skb_queue_tail(&sk->sk_receive_queue, *skb2); - sk->sk_data_ready(sk); - *skb2 = NULL; - err = 0; - } + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + return err; + + skb = skb_clone(skb, allocation); + + if (skb) { + skb_set_owner_r(skb, sk); + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk); + err = 0; } - sock_put(sk); return err; } @@ -234,7 +226,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; - struct sk_buff *skb2 = NULL; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think @@ -253,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, * socket. */ if (pfk->promisc) - pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); + pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) @@ -268,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, continue; } - err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); + err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ @@ -278,9 +269,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); + err = pfkey_broadcast_one(skb, allocation, one_sk); - kfree_skb(skb2); kfree_skb(skb); return err; } -- cgit v1.2.3 From 323fb7b947b265753de34703dbbf8acc8ea3a4de Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 7 Feb 2019 18:00:12 +0100 Subject: ASoC: samsung: i2s: Fix prescaler setting for the secondary DAI Make sure i2s->rclk_srcrate is properly initialized also during playback through the secondary DAI. Signed-off-by: Sylwester Nawrocki Acked-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- sound/soc/samsung/i2s.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index ce00fe2f6aae..d4bde4834ce5 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -604,6 +604,7 @@ static int i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct i2s_dai *i2s = to_info(dai); + struct i2s_dai *other = get_other_dai(i2s); int lrp_shift, sdf_shift, sdf_mask, lrp_rlow, mod_slave; u32 mod, tmp = 0; unsigned long flags; @@ -661,7 +662,8 @@ static int i2s_set_fmt(struct snd_soc_dai *dai, * CLK_I2S_RCLK_SRC clock is not exposed so we ensure any * clock configuration assigned in DT is not overwritten. */ - if (i2s->rclk_srcrate == 0 && i2s->clk_data.clks == NULL) + if (i2s->rclk_srcrate == 0 && i2s->clk_data.clks == NULL && + other->clk_data.clks == NULL) i2s_set_sysclk(dai, SAMSUNG_I2S_RCLKSRC_0, 0, SND_SOC_CLOCK_IN); break; @@ -699,6 +701,7 @@ static int i2s_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { struct i2s_dai *i2s = to_info(dai); + struct i2s_dai *other = get_other_dai(i2s); u32 mod, mask = 0, val = 0; struct clk *rclksrc; unsigned long flags; @@ -784,6 +787,9 @@ static int i2s_hw_params(struct snd_pcm_substream *substream, i2s->frmclk = params_rate(params); rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC]; + if (!rclksrc || IS_ERR(rclksrc)) + rclksrc = other->clk_table[CLK_I2S_RCLK_SRC]; + if (rclksrc && !IS_ERR(rclksrc)) i2s->rclk_srcrate = clk_get_rate(rclksrc); -- cgit v1.2.3 From 6e9526852fad7eb77b7755114951237312258ec7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 11 Feb 2019 19:03:37 +0530 Subject: mtd: Use mtd->name when registering nvmem device With this patch, we use the mtd->name instead of concatenating the name with '0'. Fixes: c4dfa25ab307 ("mtd: add support for reading MTD devices via the nvmem API") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Boris Brezillon --- drivers/mtd/mtdcore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 999b705769a8..3ef01baef9b6 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -507,6 +507,7 @@ static int mtd_nvmem_add(struct mtd_info *mtd) { struct nvmem_config config = {}; + config.id = -1; config.dev = &mtd->dev; config.name = mtd->name; config.owner = THIS_MODULE; -- cgit v1.2.3 From 3e35730dd7540bad2d4e002703996391d9be49a0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 11 Feb 2019 19:03:38 +0530 Subject: mtd: powernv_flash: Fix device registration error This change helps me to get multiple mtd device registered. Without this I get sysfs: cannot create duplicate filename '/bus/nvmem/devices/flash0' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc2-00557-g1ef20ef21f22 #13 Call Trace: [c0000000b38e3220] [c000000000b58fe4] dump_stack+0xe8/0x164 (unreliable) [c0000000b38e3270] [c0000000004cf074] sysfs_warn_dup+0x84/0xb0 [c0000000b38e32f0] [c0000000004cf6c4] sysfs_do_create_link_sd.isra.0+0x114/0x150 [c0000000b38e3340] [c000000000726a84] bus_add_device+0x94/0x1e0 [c0000000b38e33c0] [c0000000007218f0] device_add+0x4d0/0x830 [c0000000b38e3480] [c0000000009d54a8] nvmem_register.part.2+0x1c8/0xb30 [c0000000b38e3560] [c000000000834530] mtd_nvmem_add+0x90/0x120 [c0000000b38e3650] [c000000000835bc8] add_mtd_device+0x198/0x4e0 [c0000000b38e36f0] [c00000000083619c] mtd_device_parse_register+0x11c/0x280 [c0000000b38e3780] [c000000000840830] powernv_flash_probe+0x180/0x250 [c0000000b38e3820] [c00000000072c120] platform_drv_probe+0x60/0xf0 [c0000000b38e38a0] [c0000000007283c8] really_probe+0x138/0x4d0 [c0000000b38e3930] [c000000000728acc] driver_probe_device+0x13c/0x1b0 [c0000000b38e39b0] [c000000000728c7c] __driver_attach+0x13c/0x1c0 [c0000000b38e3a30] [c000000000725130] bus_for_each_dev+0xa0/0x120 [c0000000b38e3a90] [c000000000727b2c] driver_attach+0x2c/0x40 [c0000000b38e3ab0] [c0000000007270f8] bus_add_driver+0x228/0x360 [c0000000b38e3b40] [c00000000072a2e0] driver_register+0x90/0x1a0 [c0000000b38e3bb0] [c00000000072c020] __platform_driver_register+0x50/0x70 [c0000000b38e3bd0] [c00000000105c984] powernv_flash_driver_init+0x24/0x38 [c0000000b38e3bf0] [c000000000010904] do_one_initcall+0x84/0x464 [c0000000b38e3cd0] [c000000001004548] kernel_init_freeable+0x530/0x634 [c0000000b38e3db0] [c000000000011154] kernel_init+0x1c/0x168 [c0000000b38e3e20] [c00000000000bed4] ret_from_kernel_thread+0x5c/0x68 mtd mtd1: Failed to register NVMEM device With the change we now have root@(none):/sys/bus/nvmem/devices# ls -al total 0 drwxr-xr-x 2 root root 0 Feb 6 20:49 . drwxr-xr-x 4 root root 0 Feb 6 20:49 .. lrwxrwxrwx 1 root root 0 Feb 6 20:49 flash@0 -> ../../../devices/platform/ibm,opal:flash@0/mtd/mtd0/flash@0 lrwxrwxrwx 1 root root 0 Feb 6 20:49 flash@1 -> ../../../devices/platform/ibm,opal:flash@1/mtd/mtd1/flash@1 Fixes: 1cbb4a1c433a ("mtd: powernv: Add powernv flash MTD abstraction driver") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Boris Brezillon --- drivers/mtd/devices/powernv_flash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c index 22f753e555ac..83f88b8b5d9f 100644 --- a/drivers/mtd/devices/powernv_flash.c +++ b/drivers/mtd/devices/powernv_flash.c @@ -212,7 +212,7 @@ static int powernv_flash_set_driver_info(struct device *dev, * Going to have to check what details I need to set and how to * get them */ - mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFn", dev->of_node); + mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node); mtd->type = MTD_NORFLASH; mtd->flags = MTD_WRITEABLE; mtd->size = size; -- cgit v1.2.3 From 207a369e3c085799e7836221f64e7a7329985fb6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 4 Feb 2019 18:10:55 +0900 Subject: sh: fix build error for invisible CONFIG_BUILTIN_DTB_SOURCE I fixed a similar build error in commit 1b1e4ee86e00 ("sh: fix build error for empty CONFIG_BUILTIN_DTB_SOURCE"), but it came back again. Since commit 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules"), the combination of CONFIG_OF_EARLY_FLATTREE=y and CONFIG_USE_BUILTIN_DTB=n results in the following build error: make[1]: *** No rule to make target 'arch/sh/boot/dts/.dtb.o', needed by 'arch/sh/boot/dts/built-in.a'. Stop. Prior to that commit, there was only one path to descend into arch/sh/boot/dts/, and arch/sh/Makefile correctly guards it with CONFIG_USE_BUILTIN_DTB: core-$(CONFIG_USE_BUILTIN_DTB) += arch/sh/boot/dts/ Now, there is another path to descend there from the top Makefile when CONFIG_OF_EARLY_FLATTREE=y. If CONFIG_USE_BUILTIN_DTB is disabled, CONFIG_BUILTIN_DTB_SOURCE is invisible instead of defined as "". Add obj-$(CONFIG_USE_BUILTIN_DTB) guard to avoid the attempt to build the non-existing file. Fixes: 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules") Reported-by: kbuild test robot Signed-off-by: Masahiro Yamada --- arch/sh/boot/dts/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile index 01d0f7fb14cc..2563d1e532e2 100644 --- a/arch/sh/boot/dts/Makefile +++ b/arch/sh/boot/dts/Makefile @@ -1,3 +1,3 @@ ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"") -obj-y += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o endif -- cgit v1.2.3 From 7f665b1c3283aae5b61843136d0a8ee808ba3199 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Wed, 13 Feb 2019 10:56:19 -0700 Subject: ALSA: hda/realtek - Headset microphone and internal speaker support for System76 oryp5 On the System76 Oryx Pro (oryp5), there is a headset microphone input attached to 0x19 that does not have a jack detect. In order to get it working, the pin configuration needs to be set correctly, and the ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC fixup needs to be applied. This is similar to the MIC_NO_PRESENCE fixups for some Dell laptops, except we have a separate microphone jack that is already configured correctly. Since the ALC1220 does not have a fixup similar to ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, I have exposed the fixup from the ALC269 in a way that it can be accessed from the alc1220_fixup_system76_oryp5 function. In addition, the alc1220_fixup_clevo_p950 needs to be applied to gain speaker output. Signed-off-by: Jeremy Soller Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6df758adff84..3ce318a3086d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1855,6 +1855,8 @@ enum { ALC887_FIXUP_BASS_CHMAP, ALC1220_FIXUP_GB_DUAL_CODECS, ALC1220_FIXUP_CLEVO_P950, + ALC1220_FIXUP_SYSTEM76_ORYP5, + ALC1220_FIXUP_SYSTEM76_ORYP5_PINS, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2056,6 +2058,17 @@ static void alc1220_fixup_clevo_p950(struct hda_codec *codec, snd_hda_override_conn_list(codec, 0x1b, 1, conn1); } +static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action); + +static void alc1220_fixup_system76_oryp5(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + alc1220_fixup_clevo_p950(codec, fix, action); + alc_fixup_headset_mode_no_hp_mic(codec, fix, action); +} + static const struct hda_fixup alc882_fixups[] = { [ALC882_FIXUP_ABIT_AW9D_MAX] = { .type = HDA_FIXUP_PINS, @@ -2300,6 +2313,19 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc1220_fixup_clevo_p950, }, + [ALC1220_FIXUP_SYSTEM76_ORYP5] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc1220_fixup_system76_oryp5, + }, + [ALC1220_FIXUP_SYSTEM76_ORYP5_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + {} + }, + .chained = true, + .chain_id = ALC1220_FIXUP_SYSTEM76_ORYP5, + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2376,6 +2402,8 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS), + SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), -- cgit v1.2.3 From c8c6ee611926685a7d753409e0a6e48b9e1b8748 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 14 Feb 2019 11:41:33 +0800 Subject: ALSA: hda/realtek: Disable PC beep in passthrough on alc285 It is reported that there's a constant background "hum/whitenoise" in the headset on the Lenovo X1 machines with the codec alc285, and it is confirmed that if we run the command below, the noise will stop. sudo hda-verb /dev/snd/hwC0D0 0x1d SET_PIN_WIDGET_CONTROL 0x0 Then I consulted this issue with Kailang, he told me the pin 0x1d on this codec is used for PC beep in, the noise probably comes from this pin and we can also disable the PC beep in passthrough, then the PC beep in will not affect other sound playback. Fixes: c4cfcf6f4297 ("ALSA: hda/realtek - fix the pop noise on headphone for lenovo laptops") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1660581 Cc: Signed-off-by: Kailang Yang Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3ce318a3086d..1ffa36e987b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5660,6 +5660,7 @@ enum { ALC294_FIXUP_ASUS_SPK, ALC225_FIXUP_HEADSET_JACK, ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE, + ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, }; static const struct hda_fixup alc269_fixups[] = { @@ -6615,6 +6616,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Disable PCBEEP-IN passthrough */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x36 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_LENOVO_HEADPHONE_NOISE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7300,7 +7312,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x19, 0x03a11020}, {0x21, 0x0321101f}), - SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_HEADPHONE_NOISE, + SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, {0x12, 0x90a60130}, {0x14, 0x90170110}, {0x19, 0x04a11040}, -- cgit v1.2.3 From af14b2c98adb85e9517390bb88309338b9075350 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 14 Feb 2019 00:06:18 +0100 Subject: gpio: pxa: avoid attempting to set pin direction via pinctrl on MMP2 Similarly to PXA3xx, pinctrl-single can't set pin direction on MMP2 either. See also: commit 9dabfdd84bdfa ("gpio: pxa: disable pinctrl calls for PXA3xx") Cc: stable@vger.kernel.org Fixes: a770d946371e ("gpio: pxa: add pin control gpio direction and request") Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pxa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index e9600b556f39..bcc6be4a5cb2 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -245,6 +245,7 @@ static bool pxa_gpio_has_pinctrl(void) { switch (gpio_type) { case PXA3XX_GPIO: + case MMP2_GPIO: return false; default: -- cgit v1.2.3 From fc4144e7815b7747b6aba140d7a91da45ee9dd8c Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 14 Feb 2019 17:40:53 +0530 Subject: cxgb4: Export sge_host_page_size to ulds Export the sge_host_page_size field to ULDs via cxgb4_lld_info, so that iw_cxgb4 can make use of this in calculating the correct qp/cq mask. Fixes: 2391b0030e24 ("cxgb4: Remove SGE_HOST_PAGE_SIZE dependency on page size") Signed-off-by: Raju Rangoju Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index c041f44324db..b3654598a2d5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -660,6 +660,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; lld->udb_density = 1 << adap->params.sge.eq_qpp; lld->ucq_density = 1 << adap->params.sge.iq_qpp; + lld->sge_host_page_size = 1 << (adap->params.sge.hps + 10); lld->filt_mode = adap->params.tp.vlan_pri_map; /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ for (i = 0; i < NCHAN; i++) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 5fa9a2d5fc4b..21da34a4ca24 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -336,6 +336,7 @@ struct cxgb4_lld_info { unsigned int cclk_ps; /* Core clock period in psec */ unsigned short udb_density; /* # of user DB/page */ unsigned short ucq_density; /* # of user CQs/page */ + unsigned int sge_host_page_size; /* SGE host page size */ unsigned short filt_mode; /* filter optional components */ unsigned short tx_modq[NCHAN]; /* maps each tx channel to a */ /* scheduler queue */ -- cgit v1.2.3 From f09ef134a7ca3f0d2ce485a757f5b79809ebb803 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 14 Feb 2019 17:40:54 +0530 Subject: iw_cxgb4: cq/qp mask depends on bar2 pages in a host page Adjust the cq/qp mask based on the number of bar2 pages in a host page. For user-mode rdma, the granularity of the BAR2 memory mapped to a user rdma process during queue allocation must be based on the host page size. The lld attributes udb_density and ucq_density are used to figure out how many sge contexts are in a bar2 page. So the rdev->qpmask and rdev->cqmask in iw_cxgb4 need to now be adjusted based on how many sge bar2 pages are in a host page. Otherwise the device fails to work on non 4k page size systems. Fixes: 2391b0030e24 ("cxgb4: Remove SGE_HOST_PAGE_SIZE dependency on page size") Signed-off-by: Raju Rangoju Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/device.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index c13c0ba30f63..d499cd61c0e8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -783,6 +783,7 @@ void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, static int c4iw_rdev_open(struct c4iw_rdev *rdev) { int err; + unsigned int factor; c4iw_init_dev_ucontext(rdev, &rdev->uctx); @@ -806,8 +807,18 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) return -EINVAL; } - rdev->qpmask = rdev->lldi.udb_density - 1; - rdev->cqmask = rdev->lldi.ucq_density - 1; + /* This implementation requires a sge_host_page_size <= PAGE_SIZE. */ + if (rdev->lldi.sge_host_page_size > PAGE_SIZE) { + pr_err("%s: unsupported sge host page size %u\n", + pci_name(rdev->lldi.pdev), + rdev->lldi.sge_host_page_size); + return -EINVAL; + } + + factor = PAGE_SIZE / rdev->lldi.sge_host_page_size; + rdev->qpmask = (rdev->lldi.udb_density * factor) - 1; + rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1; + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n", pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), -- cgit v1.2.3 From a08bf91ce28ed3ae7b6fef35d843fef8dc8c2cd9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 14 Feb 2019 16:20:01 +0000 Subject: KEYS: allow reaching the keys quotas exactly If the sysctl 'kernel.keys.maxkeys' is set to some number n, then actually users can only add up to 'n - 1' keys. Likewise for 'kernel.keys.maxbytes' and the root_* versions of these sysctls. But these sysctls are apparently supposed to be *maximums*, as per their names and all documentation I could find -- the keyrings(7) man page, Documentation/security/keys/core.rst, and all the mentions of EDQUOT meaning that the key quota was *exceeded* (as opposed to reached). Thus, fix the code to allow reaching the quotas exactly. Fixes: 0b77f5bfb45c ("keys: make the keyring quotas controllable through /proc/sys") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris --- security/keys/key.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/keys/key.c b/security/keys/key.c index 44a80d6741a1..0ec9322af4f9 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -265,8 +265,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, spin_lock(&user->lock); if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) { - if (user->qnkeys + 1 >= maxkeys || - user->qnbytes + quotalen >= maxbytes || + if (user->qnkeys + 1 > maxkeys || + user->qnbytes + quotalen > maxbytes || user->qnbytes + quotalen < user->qnbytes) goto no_quota; } -- cgit v1.2.3 From bb2ba2d75a2d673e76ddaf13a9bd30d6a8b1bb08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Feb 2019 16:20:15 +0000 Subject: assoc_array: Fix shortcut creation Fix the creation of shortcuts for which the length of the index key value is an exact multiple of the machine word size. The problem is that the code that blanks off the unused bits of the shortcut value malfunctions if the number of bits in the last word equals machine word size. This is due to the "<<" operator being given a shift of zero in this case, and so the mask that should be all zeros is all ones instead. This causes the subsequent masking operation to clear everything rather than clearing nothing. Ordinarily, the presence of the hash at the beginning of the tree index key makes the issue very hard to test for, but in this case, it was encountered due to a development mistake that caused the hash output to be either 0 (keyring) or 1 (non-keyring) only. This made it susceptible to the keyctl/unlink/valid test in the keyutils package. The fix is simply to skip the blanking if the shift would be 0. For example, an index key that is 64 bits long would produce a 0 shift and thus a 'blank' of all 1s. This would then be inverted and AND'd onto the index_key, incorrectly clearing the entire last word. Fixes: 3cb989501c26 ("Add a generic associative array implementation.") Signed-off-by: David Howells Signed-off-by: James Morris --- lib/assoc_array.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index c6659cb37033..59875eb278ea 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -768,9 +768,11 @@ all_leaves_cluster_together: new_s0->index_key[i] = ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); - blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); - pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); - new_s0->index_key[keylen - 1] &= ~blank; + if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } /* This now reduces to a node splitting exercise for which we'll need * to regenerate the disparity table. -- cgit v1.2.3 From 822ad64d7e46a8e2c8b8a796738d7b657cbb146d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Feb 2019 16:20:25 +0000 Subject: keys: Fix dependency loop between construction record and auth key In the request_key() upcall mechanism there's a dependency loop by which if a key type driver overrides the ->request_key hook and the userspace side manages to lose the authorisation key, the auth key and the internal construction record (struct key_construction) can keep each other pinned. Fix this by the following changes: (1) Killing off the construction record and using the auth key instead. (2) Including the operation name in the auth key payload and making the payload available outside of security/keys/. (3) The ->request_key hook is given the authkey instead of the cons record and operation name. Changes (2) and (3) allow the auth key to naturally be cleaned up if the keyring it is in is destroyed or cleared or the auth key is unlinked. Fixes: 7ee02a316600 ("keys: Fix dependency loop between construction record and auth key") Signed-off-by: David Howells Signed-off-by: James Morris --- fs/nfs/nfs4idmap.c | 31 +++++++++------- include/keys/request_key_auth-type.h | 36 ++++++++++++++++++ include/linux/key-type.h | 22 +++-------- security/keys/internal.h | 13 +------ security/keys/keyctl.c | 1 + security/keys/process_keys.c | 1 + security/keys/request_key.c | 72 +++++++++++++++--------------------- security/keys/request_key_auth.c | 16 ++++---- 8 files changed, 100 insertions(+), 92 deletions(-) create mode 100644 include/keys/request_key_auth-type.h diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 3f23b6840547..bf34ddaa2ad7 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "internal.h" @@ -59,7 +60,7 @@ static struct key_type key_type_id_resolver_legacy; struct idmap_legacy_upcalldata { struct rpc_pipe_msg pipe_msg; struct idmap_msg idmap_msg; - struct key_construction *key_cons; + struct key *authkey; struct idmap *idmap; }; @@ -384,7 +385,7 @@ static const match_table_t nfs_idmap_tokens = { { Opt_find_err, NULL } }; -static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); +static int nfs_idmap_legacy_upcall(struct key *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); static void idmap_release_pipe(struct inode *); @@ -549,11 +550,12 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, static void nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) { - struct key_construction *cons = idmap->idmap_upcall_data->key_cons; + struct key *authkey = idmap->idmap_upcall_data->authkey; kfree(idmap->idmap_upcall_data); idmap->idmap_upcall_data = NULL; - complete_request_key(cons, ret); + complete_request_key(authkey, ret); + key_put(authkey); } static void @@ -563,15 +565,14 @@ nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) nfs_idmap_complete_pipe_upcall_locked(idmap, ret); } -static int nfs_idmap_legacy_upcall(struct key_construction *cons, - const char *op, - void *aux) +static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) { struct idmap_legacy_upcalldata *data; + struct request_key_auth *rka = get_request_key_auth(authkey); struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; - struct key *key = cons->key; + struct key *key = rka->target_key; int ret = -ENOKEY; if (!aux) @@ -586,7 +587,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, msg = &data->pipe_msg; im = &data->idmap_msg; data->idmap = idmap; - data->key_cons = cons; + data->authkey = key_get(authkey); ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) @@ -604,7 +605,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, out2: kfree(data); out1: - complete_request_key(cons, ret); + complete_request_key(authkey, ret); return ret; } @@ -651,9 +652,10 @@ out: static ssize_t idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { + struct request_key_auth *rka; struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons; + struct key *authkey; struct idmap_msg im; size_t namelen_in; int ret = -ENOKEY; @@ -665,7 +667,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (idmap->idmap_upcall_data == NULL) goto out_noupcall; - cons = idmap->idmap_upcall_data->key_cons; + authkey = idmap->idmap_upcall_data->authkey; + rka = get_request_key_auth(authkey); if (mlen != sizeof(im)) { ret = -ENOSPC; @@ -690,9 +693,9 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) ret = nfs_idmap_read_and_verify_message(&im, &idmap->idmap_upcall_data->idmap_msg, - cons->key, cons->authkey); + rka->target_key, authkey); if (ret >= 0) { - key_set_timeout(cons->key, nfs_idmap_cache_timeout); + key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); ret = mlen; } diff --git a/include/keys/request_key_auth-type.h b/include/keys/request_key_auth-type.h new file mode 100644 index 000000000000..a726dd3f1dc6 --- /dev/null +++ b/include/keys/request_key_auth-type.h @@ -0,0 +1,36 @@ +/* request_key authorisation token key type + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _KEYS_REQUEST_KEY_AUTH_TYPE_H +#define _KEYS_REQUEST_KEY_AUTH_TYPE_H + +#include + +/* + * Authorisation record for request_key(). + */ +struct request_key_auth { + struct key *target_key; + struct key *dest_keyring; + const struct cred *cred; + void *callout_info; + size_t callout_len; + pid_t pid; + char op[8]; +} __randomize_layout; + +static inline struct request_key_auth *get_request_key_auth(const struct key *key) +{ + return key->payload.data[0]; +} + + +#endif /* _KEYS_REQUEST_KEY_AUTH_TYPE_H */ diff --git a/include/linux/key-type.h b/include/linux/key-type.h index bc9af551fc83..e49d1de0614e 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -20,15 +20,6 @@ struct kernel_pkey_query; struct kernel_pkey_params; -/* - * key under-construction record - * - passed to the request_key actor if supplied - */ -struct key_construction { - struct key *key; /* key being constructed */ - struct key *authkey;/* authorisation for key being constructed */ -}; - /* * Pre-parsed payload, used by key add, update and instantiate. * @@ -50,8 +41,7 @@ struct key_preparsed_payload { time64_t expiry; /* Expiry time of key */ } __randomize_layout; -typedef int (*request_key_actor_t)(struct key_construction *key, - const char *op, void *aux); +typedef int (*request_key_actor_t)(struct key *auth_key, void *aux); /* * Preparsed matching criterion. @@ -181,20 +171,20 @@ extern int key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey); + struct key *authkey); extern int key_reject_and_link(struct key *key, unsigned timeout, unsigned error, struct key *keyring, - struct key *instkey); -extern void complete_request_key(struct key_construction *cons, int error); + struct key *authkey); +extern void complete_request_key(struct key *authkey, int error); static inline int key_negate_and_link(struct key *key, unsigned timeout, struct key *keyring, - struct key *instkey) + struct key *authkey) { - return key_reject_and_link(key, timeout, ENOKEY, keyring, instkey); + return key_reject_and_link(key, timeout, ENOKEY, keyring, authkey); } extern int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep); diff --git a/security/keys/internal.h b/security/keys/internal.h index 479909b858c7..8f533c81aa8d 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -186,20 +186,9 @@ static inline int key_permission(const key_ref_t key_ref, unsigned perm) return key_task_permission(key_ref, current_cred(), perm); } -/* - * Authorisation record for request_key(). - */ -struct request_key_auth { - struct key *target_key; - struct key *dest_keyring; - const struct cred *cred; - void *callout_info; - size_t callout_len; - pid_t pid; -} __randomize_layout; - extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, + const char *op, const void *callout_info, size_t callout_len, struct key *dest_keyring); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index e8093d025966..7bbe03593e58 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "internal.h" #define KEY_MAX_DESC_SIZE 4096 diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 02c77e928f68..0e0b9ccad2f8 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "internal.h" /* Session keyring create vs join semaphore */ diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 301f0e300dbd..3f56a312dd35 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -18,31 +18,30 @@ #include #include #include "internal.h" +#include #define key_negative_timeout 60 /* default timeout on a negative key's existence */ /** * complete_request_key - Complete the construction of a key. - * @cons: The key construction record. + * @auth_key: The authorisation key. * @error: The success or failute of the construction. * * Complete the attempt to construct a key. The key will be negated * if an error is indicated. The authorisation key will be revoked * unconditionally. */ -void complete_request_key(struct key_construction *cons, int error) +void complete_request_key(struct key *authkey, int error) { - kenter("{%d,%d},%d", cons->key->serial, cons->authkey->serial, error); + struct request_key_auth *rka = get_request_key_auth(authkey); + struct key *key = rka->target_key; + + kenter("%d{%d},%d", authkey->serial, key->serial, error); if (error < 0) - key_negate_and_link(cons->key, key_negative_timeout, NULL, - cons->authkey); + key_negate_and_link(key, key_negative_timeout, NULL, authkey); else - key_revoke(cons->authkey); - - key_put(cons->key); - key_put(cons->authkey); - kfree(cons); + key_revoke(authkey); } EXPORT_SYMBOL(complete_request_key); @@ -91,21 +90,19 @@ static int call_usermodehelper_keys(const char *path, char **argv, char **envp, * Request userspace finish the construction of a key * - execute "/sbin/request-key " */ -static int call_sbin_request_key(struct key_construction *cons, - const char *op, - void *aux) +static int call_sbin_request_key(struct key *authkey, void *aux) { static char const request_key[] = "/sbin/request-key"; + struct request_key_auth *rka = get_request_key_auth(authkey); const struct cred *cred = current_cred(); key_serial_t prkey, sskey; - struct key *key = cons->key, *authkey = cons->authkey, *keyring, - *session; + struct key *key = rka->target_key, *keyring, *session; char *argv[9], *envp[3], uid_str[12], gid_str[12]; char key_str[12], keyring_str[3][12]; char desc[20]; int ret, i; - kenter("{%d},{%d},%s", key->serial, authkey->serial, op); + kenter("{%d},{%d},%s", key->serial, authkey->serial, rka->op); ret = install_user_keyrings(); if (ret < 0) @@ -163,7 +160,7 @@ static int call_sbin_request_key(struct key_construction *cons, /* set up the argument list */ i = 0; argv[i++] = (char *)request_key; - argv[i++] = (char *) op; + argv[i++] = (char *)rka->op; argv[i++] = key_str; argv[i++] = uid_str; argv[i++] = gid_str; @@ -191,7 +188,7 @@ error_link: key_put(keyring); error_alloc: - complete_request_key(cons, ret); + complete_request_key(authkey, ret); kleave(" = %d", ret); return ret; } @@ -205,42 +202,31 @@ static int construct_key(struct key *key, const void *callout_info, size_t callout_len, void *aux, struct key *dest_keyring) { - struct key_construction *cons; request_key_actor_t actor; struct key *authkey; int ret; kenter("%d,%p,%zu,%p", key->serial, callout_info, callout_len, aux); - cons = kmalloc(sizeof(*cons), GFP_KERNEL); - if (!cons) - return -ENOMEM; - /* allocate an authorisation key */ - authkey = request_key_auth_new(key, callout_info, callout_len, + authkey = request_key_auth_new(key, "create", callout_info, callout_len, dest_keyring); - if (IS_ERR(authkey)) { - kfree(cons); - ret = PTR_ERR(authkey); - authkey = NULL; - } else { - cons->authkey = key_get(authkey); - cons->key = key_get(key); + if (IS_ERR(authkey)) + return PTR_ERR(authkey); - /* make the call */ - actor = call_sbin_request_key; - if (key->type->request_key) - actor = key->type->request_key; + /* Make the call */ + actor = call_sbin_request_key; + if (key->type->request_key) + actor = key->type->request_key; - ret = actor(cons, "create", aux); + ret = actor(authkey, aux); - /* check that the actor called complete_request_key() prior to - * returning an error */ - WARN_ON(ret < 0 && - !test_bit(KEY_FLAG_REVOKED, &authkey->flags)); - key_put(authkey); - } + /* check that the actor called complete_request_key() prior to + * returning an error */ + WARN_ON(ret < 0 && + !test_bit(KEY_FLAG_REVOKED, &authkey->flags)); + key_put(authkey); kleave(" = %d", ret); return ret; } @@ -275,7 +261,7 @@ static int construct_get_dest_keyring(struct key **_dest_keyring) if (cred->request_key_auth) { authkey = cred->request_key_auth; down_read(&authkey->sem); - rka = authkey->payload.data[0]; + rka = get_request_key_auth(authkey); if (!test_bit(KEY_FLAG_REVOKED, &authkey->flags)) dest_keyring = diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 87ea2f54dedc..afc304e8b61e 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -17,7 +17,7 @@ #include #include #include "internal.h" -#include +#include static int request_key_auth_preparse(struct key_preparsed_payload *); static void request_key_auth_free_preparse(struct key_preparsed_payload *); @@ -68,7 +68,7 @@ static int request_key_auth_instantiate(struct key *key, static void request_key_auth_describe(const struct key *key, struct seq_file *m) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); seq_puts(m, "key:"); seq_puts(m, key->description); @@ -83,7 +83,7 @@ static void request_key_auth_describe(const struct key *key, static long request_key_auth_read(const struct key *key, char __user *buffer, size_t buflen) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); size_t datalen; long ret; @@ -109,7 +109,7 @@ static long request_key_auth_read(const struct key *key, */ static void request_key_auth_revoke(struct key *key) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); kenter("{%d}", key->serial); @@ -136,7 +136,7 @@ static void free_request_key_auth(struct request_key_auth *rka) */ static void request_key_auth_destroy(struct key *key) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); kenter("{%d}", key->serial); @@ -147,8 +147,9 @@ static void request_key_auth_destroy(struct key *key) * Create an authorisation token for /sbin/request-key or whoever to gain * access to the caller's security data. */ -struct key *request_key_auth_new(struct key *target, const void *callout_info, - size_t callout_len, struct key *dest_keyring) +struct key *request_key_auth_new(struct key *target, const char *op, + const void *callout_info, size_t callout_len, + struct key *dest_keyring) { struct request_key_auth *rka, *irka; const struct cred *cred = current->cred; @@ -166,6 +167,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, if (!rka->callout_info) goto error_free_rka; rka->callout_len = callout_len; + strlcpy(rka->op, op, sizeof(rka->op)); /* see if the calling process is already servicing the key request of * another process */ -- cgit v1.2.3 From 7c1857bdbdf1e4c541e45eab477ee23ed4333ea4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Feb 2019 16:20:37 +0000 Subject: keys: Timestamp new keys Set the timestamp on new keys rather than leaving it unset. Fixes: 31d5a79d7f3d ("KEYS: Do LRU discard in full keyrings") Signed-off-by: David Howells Signed-off-by: James Morris --- security/keys/key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/keys/key.c b/security/keys/key.c index 0ec9322af4f9..696f1c092c50 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -297,6 +297,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->gid = gid; key->perm = perm; key->restrict_link = restrict_link; + key->last_used_at = ktime_get_real_seconds(); if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; -- cgit v1.2.3 From 79edd00dc6a96644d76b4a1cb97d94d49e026768 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 13 Feb 2019 13:21:39 +0800 Subject: scsi: libiscsi: Fix race between iscsi_xmit_task and iscsi_complete_task When a target sends Check Condition, whilst initiator is busy xmiting re-queued data, could lead to race between iscsi_complete_task() and iscsi_xmit_task() and eventually crashing with the following kernel backtrace. [3326150.987523] ALERT: BUG: unable to handle kernel NULL pointer dereference at 0000000000000078 [3326150.987549] ALERT: IP: [] iscsi_xmit_task+0x2d/0xc0 [libiscsi] [3326150.987571] WARN: PGD 569c8067 PUD 569c9067 PMD 0 [3326150.987582] WARN: Oops: 0002 [#1] SMP [3326150.987593] WARN: Modules linked in: tun nfsv3 nfs fscache dm_round_robin [3326150.987762] WARN: CPU: 2 PID: 8399 Comm: kworker/u32:1 Tainted: G O 4.4.0+2 #1 [3326150.987774] WARN: Hardware name: Dell Inc. PowerEdge R720/0W7JN5, BIOS 2.5.4 01/22/2016 [3326150.987790] WARN: Workqueue: iscsi_q_13 iscsi_xmitworker [libiscsi] [3326150.987799] WARN: task: ffff8801d50f3800 ti: ffff8801f5458000 task.ti: ffff8801f5458000 [3326150.987810] WARN: RIP: e030:[] [] iscsi_xmit_task+0x2d/0xc0 [libiscsi] [3326150.987825] WARN: RSP: e02b:ffff8801f545bdb0 EFLAGS: 00010246 [3326150.987831] WARN: RAX: 00000000ffffffc3 RBX: ffff880282d2ab20 RCX: ffff88026b6ac480 [3326150.987842] WARN: RDX: 0000000000000000 RSI: 00000000fffffe01 RDI: ffff880282d2ab20 [3326150.987852] WARN: RBP: ffff8801f545bdc8 R08: 0000000000000000 R09: 0000000000000008 [3326150.987862] WARN: R10: 0000000000000000 R11: 000000000000fe88 R12: 0000000000000000 [3326150.987872] WARN: R13: ffff880282d2abe8 R14: ffff880282d2abd8 R15: ffff880282d2ac08 [3326150.987890] WARN: FS: 00007f5a866b4840(0000) GS:ffff88028a640000(0000) knlGS:0000000000000000 [3326150.987900] WARN: CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [3326150.987907] WARN: CR2: 0000000000000078 CR3: 0000000070244000 CR4: 0000000000042660 [3326150.987918] WARN: Stack: [3326150.987924] WARN: ffff880282d2ad58 ffff880282d2ab20 ffff880282d2abe8 ffff8801f545be18 [3326150.987938] WARN: ffffffffa05cea90 ffff880282d2abf8 ffff88026b59cc80 ffff88026b59cc00 [3326150.987951] WARN: ffff88022acf32c0 ffff880289491800 ffff880255a80800 0000000000000400 [3326150.987964] WARN: Call Trace: [3326150.987975] WARN: [] iscsi_xmitworker+0x2f0/0x360 [libiscsi] [3326150.987988] WARN: [] process_one_work+0x1fc/0x3b0 [3326150.987997] WARN: [] worker_thread+0x2a5/0x470 [3326150.988006] WARN: [] ? __schedule+0x648/0x870 [3326150.988015] WARN: [] ? rescuer_thread+0x300/0x300 [3326150.988023] WARN: [] kthread+0xd5/0xe0 [3326150.988031] WARN: [] ? kthread_stop+0x110/0x110 [3326150.988040] WARN: [] ret_from_fork+0x3f/0x70 [3326150.988048] WARN: [] ? kthread_stop+0x110/0x110 [3326150.988127] ALERT: RIP [] iscsi_xmit_task+0x2d/0xc0 [libiscsi] [3326150.988138] WARN: RSP [3326150.988144] WARN: CR2: 0000000000000078 [3326151.020366] WARN: ---[ end trace 1c60974d4678d81b ]--- Commit 6f8830f5bbab ("scsi: libiscsi: add lock around task lists to fix list corruption regression") introduced "taskqueuelock" to fix list corruption during the race, but this wasn't enough. Re-setting of conn->task to NULL, could race with iscsi_xmit_task(). iscsi_complete_task() { .... if (conn->task == task) conn->task = NULL; } conn->task in iscsi_xmit_task() could be NULL and so will be task. __iscsi_get_task(task) will crash (NullPtr de-ref), trying to access refcount. iscsi_xmit_task() { struct iscsi_task *task = conn->task; __iscsi_get_task(task); } This commit will take extra conn->session->back_lock in iscsi_xmit_task() to ensure iscsi_xmit_task() waits for iscsi_complete_task(), if iscsi_complete_task() wins the race. If iscsi_xmit_task() wins the race, iscsi_xmit_task() increments task->refcount (__iscsi_get_task) ensuring iscsi_complete_task() will not iscsi_free_task(). Signed-off-by: Anoob Soman Signed-off-by: Bob Liu Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index b8d325ce8754..120fc520f27a 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1459,7 +1459,13 @@ static int iscsi_xmit_task(struct iscsi_conn *conn) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) return -ENODATA; + spin_lock_bh(&conn->session->back_lock); + if (conn->task == NULL) { + spin_unlock_bh(&conn->session->back_lock); + return -ENODATA; + } __iscsi_get_task(task); + spin_unlock_bh(&conn->session->back_lock); spin_unlock_bh(&conn->session->frwd_lock); rc = conn->session->tt->xmit_task(task); spin_lock_bh(&conn->session->frwd_lock); -- cgit v1.2.3 From 515ce60613128be7a176a8b82b20c7624f3b440d Mon Sep 17 00:00:00 2001 From: Masato Suzuki Date: Thu, 14 Feb 2019 15:01:18 +0900 Subject: scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation The function sd_zbc_do_report_zones() issues a REPORT ZONES command with a buffer size calculated based on the number of zones requested by the caller. This value should however not exceed the capabilities of the hardware maximum command size, that is, should not exceed the max_hw_sectors limit of the device. This problem leads to failures of report zones commands when re-validating disks with some SAS HBAs. Fix this by limiting a report zone command buffer size to the minimum of the device max_hw_sectors and calculated value based on the requested number of zones. This does not change the semantic of the report_zones file operation as report zones can always return less zone reports than requested. Short reports are handled using a loop execution of the report_zones file operation in the function blk_report_zones(). [Damien] Before patch 'e76239a3748c ("block: add a report_zones method")', report zones buffer allocation was limited to max_sectors when allocated in blk_report_zones(). This however does not consider the actual format of the device reply which is interface dependent. Limiting the allocation based on the size of the expected reply format rather than the size of the array of generic sturct blkzone passed by blk_report_zones() makes more sense. Fixes: e76239a3748c ("block: add a report_zones method") Cc: stable@vger.kernel.org Signed-off-by: Masato Suzuki Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index fff86940388b..a340af797a85 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -142,10 +142,12 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, return -EOPNOTSUPP; /* - * Get a reply buffer for the number of requested zones plus a header. - * For ATA, buffers must be aligned to 512B. + * Get a reply buffer for the number of requested zones plus a header, + * without exceeding the device maximum command size. For ATA disks, + * buffers must be aligned to 512B. */ - buflen = roundup((nrz + 1) * 64, 512); + buflen = min(queue_max_hw_sectors(disk->queue) << 9, + roundup((nrz + 1) * 64, 512)); buf = kmalloc(buflen, gfp_mask); if (!buf) return -ENOMEM; -- cgit v1.2.3 From ffeafdd2bf0b280d67ec1a47ea6287910d271f3f Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 15 Feb 2019 00:37:57 +0800 Subject: scsi: libsas: Fix rphy phy_identifier for PHYs with end devices attached The sysfs phy_identifier attribute for a sas_end_device comes from the rphy phy_identifier value. Currently this is not being set for rphys with an end device attached, so we see incorrect symlinks from systemd disk/by-path: root@localhost:~# ls -l /dev/disk/by-path/ total 0 lrwxrwxrwx 1 root root 9 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0 -> ../../sdb lrwxrwxrwx 1 root root 10 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0-part1 -> ../../sdb1 lrwxrwxrwx 1 root root 10 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0-part2 -> ../../sdb2 lrwxrwxrwx 1 root root 10 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0-part3 -> ../../sdc3 Indeed, each sas_end_device phy_identifier value is 0: root@localhost:/# more sys/class/sas_device/end_device-0\:0\:2/phy_identifier 0 root@localhost:/# more sys/class/sas_device/end_device-0\:0\:10/phy_identifier 0 This patch fixes the discovery code to set the phy_identifier. With this, we now get proper symlinks: root@localhost:~# ls -l /dev/disk/by-path/ total 0 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy10-lun-0 -> ../../sdg lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy11-lun-0 -> ../../sdh lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy2-lun-0 -> ../../sda lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy2-lun-0-part1 -> ../../sda1 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy3-lun-0 -> ../../sdb lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy3-lun-0-part1 -> ../../sdb1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy3-lun-0-part2 -> ../../sdb2 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0 -> ../../sdc lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0-part1 -> ../../sdc1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0-part2 -> ../../sdc2 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0-part3 -> ../../sdc3 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy5-lun-0 -> ../../sdd lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0 -> ../../sde lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0-part1 -> ../../sde1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0-part2 -> ../../sde2 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0-part3 -> ../../sde3 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0 -> ../../sdf lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0-part1 -> ../../sdf1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0-part2 -> ../../sdf2 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0-part3 -> ../../sdf3 Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Reported-by: dann frazier Signed-off-by: John Garry Reviewed-by: Jason Yan Tested-by: dann frazier Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 17eb4185f29d..f21c93bbb35c 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -828,6 +828,7 @@ static struct domain_device *sas_ex_discover_end_dev( rphy = sas_end_device_alloc(phy->port); if (!rphy) goto out_free; + rphy->identify.phy_identifier = phy_id; child->rphy = rphy; get_device(&rphy->dev); @@ -854,6 +855,7 @@ static struct domain_device *sas_ex_discover_end_dev( child->rphy = rphy; get_device(&rphy->dev); + rphy->identify.phy_identifier = phy_id; sas_fill_in_rphy(child, rphy); list_add_tail(&child->disco_list_node, &parent->port->disco_list); -- cgit v1.2.3 From 4a067cf823d9d8e50d41cfb618011c0d4a969c72 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 14 Feb 2019 22:57:41 +0100 Subject: scsi: core: reset host byte in DID_NEXUS_FAILURE case Up to 4.12, __scsi_error_from_host_byte() would reset the host byte to DID_OK for various cases including DID_NEXUS_FAILURE. Commit 2a842acab109 ("block: introduce new block status code type") replaced this function with scsi_result_to_blk_status() and removed the host-byte resetting code for the DID_NEXUS_FAILURE case. As the line set_host_byte(cmd, DID_OK) was preserved for the other cases, I suppose this was an editing mistake. The fact that the host byte remains set after 4.13 is causing problems with the sg_persist tool, which now returns success rather then exit status 24 when a RESERVATION CONFLICT error is encountered. Fixes: 2a842acab109 "block: introduce new block status code type" Signed-off-by: Martin Wilck Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6d65ac584eba..f8d51c3d5582 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -655,6 +655,7 @@ static blk_status_t scsi_result_to_blk_status(struct scsi_cmnd *cmd, int result) set_host_byte(cmd, DID_OK); return BLK_STS_TARGET; case DID_NEXUS_FAILURE: + set_host_byte(cmd, DID_OK); return BLK_STS_NEXUS; case DID_ALLOC_FAILURE: set_host_byte(cmd, DID_OK); -- cgit v1.2.3 From c17abcfa93bf0be5e48bb011607d237ac2bfc839 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 9 Feb 2019 02:01:01 +0100 Subject: pinctrl: meson: meson8b: fix the sdxc_a data 1..3 pins Fix the mismatch between the "sdxc_d13_1_a" pin group definition from meson8b_cbus_groups and the entry in sdxc_a_groups ("sdxc_d0_13_1_a"). This makes it possible to use "sdxc_d13_1_a" in device-tree files to route the MMC data 1..3 pins to GPIOX_1..3. Fixes: 0fefcb6876d0d6 ("pinctrl: Add support for Meson8b") Signed-off-by: Martin Blumenstingl Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index c69ca95b1ad5..0f140a802137 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -693,7 +693,7 @@ static const char * const sd_a_groups[] = { static const char * const sdxc_a_groups[] = { "sdxc_d0_0_a", "sdxc_d13_0_a", "sdxc_d47_a", "sdxc_clk_a", - "sdxc_cmd_a", "sdxc_d0_1_a", "sdxc_d0_13_1_a" + "sdxc_cmd_a", "sdxc_d0_1_a", "sdxc_d13_1_a" }; static const char * const pcm_a_groups[] = { -- cgit v1.2.3 From 0358affb5cd8bbd685a6ab163a36dd28a818da73 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Feb 2019 15:41:01 +0100 Subject: Documentation: change linux-4.x references to 5.x As linux-5.0.x is coming up soon, the documentation should match, in particular the README.rst file, so change all 4.x references accordingly. There was a mix of lowercase and uppercase X here, which I changed to using lowercase consistently. Signed-off-by: Arnd Bergmann Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/README.rst | 32 +++--- Documentation/process/applying-patches.rst | 117 +++++++++++---------- .../translations/it_IT/admin-guide/README.rst | 2 +- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 0797eec76be1..47e577264198 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -1,9 +1,9 @@ .. _readme: -Linux kernel release 4.x +Linux kernel release 5.x ============================================= -These are the release notes for Linux version 4. Read them carefully, +These are the release notes for Linux version 5. Read them carefully, as they tell you what this is all about, explain how to install the kernel, and what to do if something goes wrong. @@ -63,7 +63,7 @@ Installing the kernel source directory where you have permissions (e.g. your home directory) and unpack it:: - xz -cd linux-4.X.tar.xz | tar xvf - + xz -cd linux-5.x.tar.xz | tar xvf - Replace "X" with the version number of the latest kernel. @@ -72,26 +72,26 @@ Installing the kernel source files. They should match the library, and not get messed up by whatever the kernel-du-jour happens to be. - - You can also upgrade between 4.x releases by patching. Patches are + - You can also upgrade between 5.x releases by patching. Patches are distributed in the xz format. To install by patching, get all the newer patch files, enter the top level directory of the kernel source - (linux-4.X) and execute:: + (linux-5.x) and execute:: - xz -cd ../patch-4.x.xz | patch -p1 + xz -cd ../patch-5.x.xz | patch -p1 - Replace "x" for all versions bigger than the version "X" of your current + Replace "x" for all versions bigger than the version "x" of your current source tree, **in_order**, and you should be ok. You may want to remove the backup files (some-file-name~ or some-file-name.orig), and make sure that there are no failed patches (some-file-name# or some-file-name.rej). If there are, either you or I have made a mistake. - Unlike patches for the 4.x kernels, patches for the 4.x.y kernels + Unlike patches for the 5.x kernels, patches for the 5.x.y kernels (also known as the -stable kernels) are not incremental but instead apply - directly to the base 4.x kernel. For example, if your base kernel is 4.0 - and you want to apply the 4.0.3 patch, you must not first apply the 4.0.1 - and 4.0.2 patches. Similarly, if you are running kernel version 4.0.2 and - want to jump to 4.0.3, you must first reverse the 4.0.2 patch (that is, - patch -R) **before** applying the 4.0.3 patch. You can read more on this in + directly to the base 5.x kernel. For example, if your base kernel is 5.0 + and you want to apply the 5.0.3 patch, you must not first apply the 5.0.1 + and 5.0.2 patches. Similarly, if you are running kernel version 5.0.2 and + want to jump to 5.0.3, you must first reverse the 5.0.2 patch (that is, + patch -R) **before** applying the 5.0.3 patch. You can read more on this in :ref:`Documentation/process/applying-patches.rst `. Alternatively, the script patch-kernel can be used to automate this @@ -114,7 +114,7 @@ Installing the kernel source Software requirements --------------------- - Compiling and running the 4.x kernels requires up-to-date + Compiling and running the 5.x kernels requires up-to-date versions of various software packages. Consult :ref:`Documentation/process/changes.rst ` for the minimum version numbers required and how to get updates for these packages. Beware that using @@ -132,12 +132,12 @@ Build directory for the kernel place for the output files (including .config). Example:: - kernel source code: /usr/src/linux-4.X + kernel source code: /usr/src/linux-5.x build directory: /home/name/build/kernel To configure and build the kernel, use:: - cd /usr/src/linux-4.X + cd /usr/src/linux-5.x make O=/home/name/build/kernel menuconfig make O=/home/name/build/kernel sudo make O=/home/name/build/kernel modules_install install diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst index dc2ddc345044..fbb9297e6360 100644 --- a/Documentation/process/applying-patches.rst +++ b/Documentation/process/applying-patches.rst @@ -216,14 +216,14 @@ You can use the ``interdiff`` program (http://cyberelk.net/tim/patchutils/) to generate a patch representing the differences between two patches and then apply the result. -This will let you move from something like 4.7.2 to 4.7.3 in a single +This will let you move from something like 5.7.2 to 5.7.3 in a single step. The -z flag to interdiff will even let you feed it patches in gzip or bzip2 compressed form directly without the use of zcat or bzcat or manual decompression. -Here's how you'd go from 4.7.2 to 4.7.3 in a single step:: +Here's how you'd go from 5.7.2 to 5.7.3 in a single step:: - interdiff -z ../patch-4.7.2.gz ../patch-4.7.3.gz | patch -p1 + interdiff -z ../patch-5.7.2.gz ../patch-5.7.3.gz | patch -p1 Although interdiff may save you a step or two you are generally advised to do the additional steps since interdiff can get things wrong in some cases. @@ -245,62 +245,67 @@ The patches are available at http://kernel.org/ Most recent patches are linked from the front page, but they also have specific homes. -The 4.x.y (-stable) and 4.x patches live at +The 5.x.y (-stable) and 5.x patches live at - https://www.kernel.org/pub/linux/kernel/v4.x/ + https://www.kernel.org/pub/linux/kernel/v5.x/ -The -rc patches live at +The -rc patches are not stored on the webserver but are generated on +demand from git tags such as - https://www.kernel.org/pub/linux/kernel/v4.x/testing/ + https://git.kernel.org/torvalds/p/v5.1-rc1/v5.0 +The stable -rc patches live at -The 4.x kernels + https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/ + + +The 5.x kernels =============== These are the base stable releases released by Linus. The highest numbered release is the most recent. If regressions or other serious flaws are found, then a -stable fix patch -will be released (see below) on top of this base. Once a new 4.x base +will be released (see below) on top of this base. Once a new 5.x base kernel is released, a patch is made available that is a delta between the -previous 4.x kernel and the new one. +previous 5.x kernel and the new one. -To apply a patch moving from 4.6 to 4.7, you'd do the following (note -that such patches do **NOT** apply on top of 4.x.y kernels but on top of the -base 4.x kernel -- if you need to move from 4.x.y to 4.x+1 you need to -first revert the 4.x.y patch). +To apply a patch moving from 5.6 to 5.7, you'd do the following (note +that such patches do **NOT** apply on top of 5.x.y kernels but on top of the +base 5.x kernel -- if you need to move from 5.x.y to 5.x+1 you need to +first revert the 5.x.y patch). Here are some examples:: - # moving from 4.6 to 4.7 + # moving from 5.6 to 5.7 - $ cd ~/linux-4.6 # change to kernel source dir - $ patch -p1 < ../patch-4.7 # apply the 4.7 patch + $ cd ~/linux-5.6 # change to kernel source dir + $ patch -p1 < ../patch-5.7 # apply the 5.7 patch $ cd .. - $ mv linux-4.6 linux-4.7 # rename source dir + $ mv linux-5.6 linux-5.7 # rename source dir - # moving from 4.6.1 to 4.7 + # moving from 5.6.1 to 5.7 - $ cd ~/linux-4.6.1 # change to kernel source dir - $ patch -p1 -R < ../patch-4.6.1 # revert the 4.6.1 patch - # source dir is now 4.6 - $ patch -p1 < ../patch-4.7 # apply new 4.7 patch + $ cd ~/linux-5.6.1 # change to kernel source dir + $ patch -p1 -R < ../patch-5.6.1 # revert the 5.6.1 patch + # source dir is now 5.6 + $ patch -p1 < ../patch-5.7 # apply new 5.7 patch $ cd .. - $ mv linux-4.6.1 linux-4.7 # rename source dir + $ mv linux-5.6.1 linux-5.7 # rename source dir -The 4.x.y kernels +The 5.x.y kernels ================= Kernels with 3-digit versions are -stable kernels. They contain small(ish) critical fixes for security problems or significant regressions discovered -in a given 4.x kernel. +in a given 5.x kernel. This is the recommended branch for users who want the most recent stable kernel and are not interested in helping test development/experimental versions. -If no 4.x.y kernel is available, then the highest numbered 4.x kernel is +If no 5.x.y kernel is available, then the highest numbered 5.x kernel is the current stable kernel. .. note:: @@ -308,23 +313,23 @@ the current stable kernel. The -stable team usually do make incremental patches available as well as patches against the latest mainline release, but I only cover the non-incremental ones below. The incremental ones can be found at - https://www.kernel.org/pub/linux/kernel/v4.x/incr/ + https://www.kernel.org/pub/linux/kernel/v5.x/incr/ -These patches are not incremental, meaning that for example the 4.7.3 -patch does not apply on top of the 4.7.2 kernel source, but rather on top -of the base 4.7 kernel source. +These patches are not incremental, meaning that for example the 5.7.3 +patch does not apply on top of the 5.7.2 kernel source, but rather on top +of the base 5.7 kernel source. -So, in order to apply the 4.7.3 patch to your existing 4.7.2 kernel -source you have to first back out the 4.7.2 patch (so you are left with a -base 4.7 kernel source) and then apply the new 4.7.3 patch. +So, in order to apply the 5.7.3 patch to your existing 5.7.2 kernel +source you have to first back out the 5.7.2 patch (so you are left with a +base 5.7 kernel source) and then apply the new 5.7.3 patch. Here's a small example:: - $ cd ~/linux-4.7.2 # change to the kernel source dir - $ patch -p1 -R < ../patch-4.7.2 # revert the 4.7.2 patch - $ patch -p1 < ../patch-4.7.3 # apply the new 4.7.3 patch + $ cd ~/linux-5.7.2 # change to the kernel source dir + $ patch -p1 -R < ../patch-5.7.2 # revert the 5.7.2 patch + $ patch -p1 < ../patch-5.7.3 # apply the new 5.7.3 patch $ cd .. - $ mv linux-4.7.2 linux-4.7.3 # rename the kernel source dir + $ mv linux-5.7.2 linux-5.7.3 # rename the kernel source dir The -rc kernels =============== @@ -343,38 +348,38 @@ This is a good branch to run for people who want to help out testing development kernels but do not want to run some of the really experimental stuff (such people should see the sections about -next and -mm kernels below). -The -rc patches are not incremental, they apply to a base 4.x kernel, just -like the 4.x.y patches described above. The kernel version before the -rcN +The -rc patches are not incremental, they apply to a base 5.x kernel, just +like the 5.x.y patches described above. The kernel version before the -rcN suffix denotes the version of the kernel that this -rc kernel will eventually turn into. -So, 4.8-rc5 means that this is the fifth release candidate for the 4.8 -kernel and the patch should be applied on top of the 4.7 kernel source. +So, 5.8-rc5 means that this is the fifth release candidate for the 5.8 +kernel and the patch should be applied on top of the 5.7 kernel source. Here are 3 examples of how to apply these patches:: - # first an example of moving from 4.7 to 4.8-rc3 + # first an example of moving from 5.7 to 5.8-rc3 - $ cd ~/linux-4.7 # change to the 4.7 source dir - $ patch -p1 < ../patch-4.8-rc3 # apply the 4.8-rc3 patch + $ cd ~/linux-5.7 # change to the 5.7 source dir + $ patch -p1 < ../patch-5.8-rc3 # apply the 5.8-rc3 patch $ cd .. - $ mv linux-4.7 linux-4.8-rc3 # rename the source dir + $ mv linux-5.7 linux-5.8-rc3 # rename the source dir - # now let's move from 4.8-rc3 to 4.8-rc5 + # now let's move from 5.8-rc3 to 5.8-rc5 - $ cd ~/linux-4.8-rc3 # change to the 4.8-rc3 dir - $ patch -p1 -R < ../patch-4.8-rc3 # revert the 4.8-rc3 patch - $ patch -p1 < ../patch-4.8-rc5 # apply the new 4.8-rc5 patch + $ cd ~/linux-5.8-rc3 # change to the 5.8-rc3 dir + $ patch -p1 -R < ../patch-5.8-rc3 # revert the 5.8-rc3 patch + $ patch -p1 < ../patch-5.8-rc5 # apply the new 5.8-rc5 patch $ cd .. - $ mv linux-4.8-rc3 linux-4.8-rc5 # rename the source dir + $ mv linux-5.8-rc3 linux-5.8-rc5 # rename the source dir - # finally let's try and move from 4.7.3 to 4.8-rc5 + # finally let's try and move from 5.7.3 to 5.8-rc5 - $ cd ~/linux-4.7.3 # change to the kernel source dir - $ patch -p1 -R < ../patch-4.7.3 # revert the 4.7.3 patch - $ patch -p1 < ../patch-4.8-rc5 # apply new 4.8-rc5 patch + $ cd ~/linux-5.7.3 # change to the kernel source dir + $ patch -p1 -R < ../patch-5.7.3 # revert the 5.7.3 patch + $ patch -p1 < ../patch-5.8-rc5 # apply new 5.8-rc5 patch $ cd .. - $ mv linux-4.7.3 linux-4.8-rc5 # rename the kernel source dir + $ mv linux-5.7.3 linux-5.8-rc5 # rename the kernel source dir The -mm patches and the linux-next tree diff --git a/Documentation/translations/it_IT/admin-guide/README.rst b/Documentation/translations/it_IT/admin-guide/README.rst index 80f5ffc94a9e..b37166817842 100644 --- a/Documentation/translations/it_IT/admin-guide/README.rst +++ b/Documentation/translations/it_IT/admin-guide/README.rst @@ -4,7 +4,7 @@ .. _it_readme: -Rilascio del kernel Linux 4.x +Rilascio del kernel Linux 5.x =================================================== .. warning:: -- cgit v1.2.3 From 660899ddf06ae8bb5bbbd0a19418b739375430c5 Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Mon, 18 Feb 2019 10:49:39 +0100 Subject: xfrm: Fix inbound traffic via XFRM interfaces across network namespaces After moving an XFRM interface to another namespace it stays associated with the original namespace (net in `struct xfrm_if` and the list keyed with `xfrmi_net_id`), allowing processes in the new namespace to use SAs/policies that were created in the original namespace. For instance, this allows a keying daemon in one namespace to establish IPsec SAs for other namespaces without processes there having access to the keys or IKE credentials. This worked fine for outbound traffic, however, for inbound traffic the lookup for the interfaces and the policies used the incorrect namespace (the one the XFRM interface was moved to). Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Tobias Brunner Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 4 ++-- net/xfrm/xfrm_policy.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 6be8c7df15bb..dbb3c1945b5c 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -76,10 +76,10 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb) int ifindex; struct xfrm_if *xi; - if (!skb->dev) + if (!secpath_exists(skb) || !skb->dev) return NULL; - xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id); + xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); ifindex = skb->dev->ifindex; for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ba0a4048c846..8d1a898d0ba5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3314,8 +3314,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (ifcb) { xi = ifcb->decode_session(skb); - if (xi) + if (xi) { if_id = xi->p.if_id; + net = xi->net; + } } rcu_read_unlock(); -- cgit v1.2.3 From f54dada8274643e3ff4436df0ea124aeedc43cae Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Feb 2019 16:34:27 +0000 Subject: arm64: fix SSBS sanitization In valid_user_regs() we treat SSBS as a RES0 bit, and consequently it is unexpectedly cleared when we restore a sigframe or fiddle with GPRs via ptrace. This patch fixes valid_user_regs() to account for this, updating the function to refer to the latest ARM ARM (ARM DDI 0487D.a). For AArch32 tasks, SSBS appears in bit 23 of SPSR_EL1, matching its position in the AArch32-native PSR format, and we don't need to translate it as we have to for DIT. There are no other bit assignments that we need to account for today. As the recent documentation describes the DIT bit, we can drop our comment regarding DIT. While removing SSBS from the RES0 masks, existing inconsistent whitespace is corrected. Fixes: d71be2b6c0e19180 ("arm64: cpufeature: Detect SSBS and advertise to userspace") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9dce33b0e260..ddaea0fd2fa4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1702,19 +1702,20 @@ void syscall_trace_exit(struct pt_regs *regs) } /* - * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a - * We also take into account DIT (bit 24), which is not yet documented, and - * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be - * allocated an EL0 meaning in future. + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a. + * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is + * not described in ARM DDI 0487D.a. + * We treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may + * be allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ - GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) static int valid_compat_regs(struct user_pt_regs *regs) { -- cgit v1.2.3 From 0738c8b5915c7eaf1e6007b441008e8f3b460443 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 14 Feb 2019 18:39:59 -0700 Subject: arm64/neon: Disable -Wincompatible-pointer-types when building with Clang After commit cc9f8349cb33 ("arm64: crypto: add NEON accelerated XOR implementation"), Clang builds for arm64 started failing with the following error message. arch/arm64/lib/xor-neon.c:58:28: error: incompatible pointer types assigning to 'const unsigned long *' from 'uint64_t *' (aka 'unsigned long long *') [-Werror,-Wincompatible-pointer-types] v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); ^~~~~~~~ /usr/lib/llvm-9/lib/clang/9.0.0/include/arm_neon.h:7538:47: note: expanded from macro 'vld1q_u64' __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \ ^~~~ There has been quite a bit of debate and triage that has gone into figuring out what the proper fix is, viewable at the link below, which is still ongoing. Ard suggested disabling this warning with Clang with a pragma so no neon code will have this type of error. While this is not at all an ideal solution, this build error is the only thing preventing KernelCI from having successful arm64 defconfig and allmodconfig builds on linux-next. Getting continuous integration running is more important so new warnings/errors or boot failures can be caught and fixed quickly. Link: https://github.com/ClangBuiltLinux/linux/issues/283 Suggested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Nathan Chancellor Signed-off-by: Will Deacon --- arch/arm64/include/asm/neon-intrinsics.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h index 2ba6c6b9541f..71abfc7612b2 100644 --- a/arch/arm64/include/asm/neon-intrinsics.h +++ b/arch/arm64/include/asm/neon-intrinsics.h @@ -36,4 +36,8 @@ #include #endif +#ifdef CONFIG_CC_IS_CLANG +#pragma clang diagnostic ignored "-Wincompatible-pointer-types" +#endif + #endif /* __ASM_NEON_INTRINSICS_H */ -- cgit v1.2.3 From 0fd3fd0a9bb0b02b6435bb7070e9f7b82a23f068 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 5 Feb 2019 20:30:27 +0100 Subject: libceph: handle an empty authorize reply The authorize reply can be empty, for example when the ticket used to build the authorizer is too old and TAG_BADAUTHORIZER is returned from the service. Calling ->verify_authorizer_reply() results in an attempt to decrypt and validate (somewhat) random data in au->buf (most likely the signature block from calc_signature()), which fails and ends up in con_fault_finish() with !con->auth_retry. The ticket isn't invalidated and the connection is retried again and again until a new ticket is obtained from the monitor: libceph: osd2 192.168.122.1:6809 bad authorize reply libceph: osd2 192.168.122.1:6809 bad authorize reply libceph: osd2 192.168.122.1:6809 bad authorize reply libceph: osd2 192.168.122.1:6809 bad authorize reply Let TAG_BADAUTHORIZER handler kick in and increment con->auth_retry. Cc: stable@vger.kernel.org Fixes: 5c056fdc5b47 ("libceph: verify authorize reply on connect") Link: https://tracker.ceph.com/issues/20164 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3661cdd927f1..7e71b0df1fbc 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2058,6 +2058,8 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); if (con->auth) { + int len = le32_to_cpu(con->in_reply.authorizer_len); + /* * Any connection that defines ->get_authorizer() * should also define ->add_authorizer_challenge() and @@ -2067,8 +2069,7 @@ static int process_connect(struct ceph_connection *con) */ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ret = con->ops->add_authorizer_challenge( - con, con->auth->authorizer_reply_buf, - le32_to_cpu(con->in_reply.authorizer_len)); + con, con->auth->authorizer_reply_buf, len); if (ret < 0) return ret; @@ -2078,10 +2079,12 @@ static int process_connect(struct ceph_connection *con) return 0; } - ret = con->ops->verify_authorizer_reply(con); - if (ret < 0) { - con->error_msg = "bad authorize reply"; - return ret; + if (len) { + ret = con->ops->verify_authorizer_reply(con); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } } } -- cgit v1.2.3 From 04242ff3ac0abbaa4362f97781dac268e6c3541a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Feb 2019 15:18:52 +0800 Subject: ceph: avoid repeatedly adding inode to mdsc->snap_flush_list Otherwise, mdsc->snap_flush_list may get corrupted. Cc: stable@vger.kernel.org Signed-off-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 041c27ea8de1..f74193da0e09 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -616,7 +616,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->size); spin_lock(&mdsc->snap_flush_lock); - list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); + if (list_empty(&ci->i_snap_flush_item)) + list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); spin_unlock(&mdsc->snap_flush_lock); return 1; /* caller may want to ceph_flush_snaps */ } -- cgit v1.2.3 From 304017d31df36fb61eb2ed3ebf65fb6870b3c731 Mon Sep 17 00:00:00 2001 From: Bard liao Date: Sun, 17 Feb 2019 21:23:47 +0800 Subject: ASoC: topology: free created components in tplg load error Topology resources are no longer needed if any element failed to load. Signed-off-by: Bard liao Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index fc79ec6927e3..731b963b6995 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2487,6 +2487,7 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, struct snd_soc_tplg_ops *ops, const struct firmware *fw, u32 id) { struct soc_tplg tplg; + int ret; /* setup parsing context */ memset(&tplg, 0, sizeof(tplg)); @@ -2500,7 +2501,12 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, tplg.bytes_ext_ops = ops->bytes_ext_ops; tplg.bytes_ext_ops_count = ops->bytes_ext_ops_count; - return soc_tplg_load(&tplg); + ret = soc_tplg_load(&tplg); + /* free the created components if fail to load topology */ + if (ret) + snd_soc_tplg_component_remove(comp, SND_SOC_TPLG_INDEX_ALL); + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load); -- cgit v1.2.3 From 19dd0777773ab17b4d97f7105e836867c0cdecb4 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 15 Feb 2019 15:31:29 +0900 Subject: ASoC: simple-card: fixup refcount_t underflow commit da215354eb55c ("ASoC: simple-card: merge simple-scu-card") merged simple-card and simple-scu-card. Then it had refcount underflow bug. This patch fixup it. We will get below error without this patch. OF: ERROR: Bad of_node_put() on /sound CPU: 3 PID: 237 Comm: kworker/3:1 Not tainted 5.0.0-rc6+ #1514 Hardware name: Renesas H3ULCB Kingfisher board based on r8a7795 ES2.0+ (DT) Workqueue: events deferred_probe_work_func Call trace: dump_backtrace+0x0/0x150 show_stack+0x24/0x30 dump_stack+0xb0/0xec of_node_release+0xd0/0xd8 kobject_put+0x74/0xe8 of_node_put+0x24/0x30 __of_get_next_child+0x50/0x70 of_get_next_child+0x40/0x68 asoc_simple_card_probe+0x604/0x730 platform_drv_probe+0x58/0xa8 ... Reported-by: Vicente Bergas Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 37e001cf9cd1..3fe34417ec89 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -462,7 +462,7 @@ static int asoc_simple_card_parse_of(struct simple_card_data *priv) conf_idx = 0; node = of_get_child_by_name(top, PREFIX "dai-link"); if (!node) { - node = dev->of_node; + node = of_node_get(top); loop = 0; } -- cgit v1.2.3 From df1a2cb7c74b3d3abc8d8c2d690f82c8ebc3490a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 12 Feb 2019 15:42:38 -0800 Subject: bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Syzbot found out that running BPF_PROG_TEST_RUN with repeat=0xffffffff makes process unkillable. The problem is that when CONFIG_PREEMPT is enabled, we never see need_resched() return true. This is due to the fact that preempt_enable() (which we do in bpf_test_run_one on each iteration) now handles resched if it's needed. Let's disable preemption for the whole run, not per test. In this case we can properly see whether resched is needed. Let's also properly return -EINTR to the userspace in case of a signal interrupt. See recent discussion: http://lore.kernel.org/netdev/CAH3MdRWHr4N8jei8jxDppXjmw-Nw=puNDLbu1dQOFQHxfU2onA@mail.gmail.com I'll follow up with the same fix bpf_prog_test_run_flow_dissector in bpf-next. Reported-by: syzbot Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/bpf/test_run.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fa2644d276ef..e31e1b20f7f4 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -13,27 +13,13 @@ #include #include -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) -{ - u32 ret; - - preempt_disable(); - rcu_read_lock(); - bpf_cgroup_storage_set(storage); - ret = BPF_PROG_RUN(prog, ctx); - rcu_read_unlock(); - preempt_enable(); - - return ret; -} - -static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret, - u32 *time) +static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, + u32 *retval, u32 *time) { struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 }; enum bpf_cgroup_storage_type stype; u64 time_start, time_spent = 0; + int ret = 0; u32 i; for_each_cgroup_storage_type(stype) { @@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret, if (!repeat) repeat = 1; + + rcu_read_lock(); + preempt_disable(); time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - *ret = bpf_test_run_one(prog, ctx, storage); + bpf_cgroup_storage_set(storage); + *retval = BPF_PROG_RUN(prog, ctx); + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + if (need_resched()) { - if (signal_pending(current)) - break; time_spent += ktime_get_ns() - time_start; + preempt_enable(); + rcu_read_unlock(); + cond_resched(); + + rcu_read_lock(); + preempt_disable(); time_start = ktime_get_ns(); } } time_spent += ktime_get_ns() - time_start; + preempt_enable(); + rcu_read_unlock(); + do_div(time_spent, repeat); *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; for_each_cgroup_storage_type(stype) bpf_cgroup_storage_free(storage[stype]); - return 0; + return ret; } static int bpf_test_finish(const union bpf_attr *kattr, -- cgit v1.2.3 From 8f5b27347e88b171c755562f0090ce40e514fc00 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 18 Feb 2019 16:58:01 +1100 Subject: powerpc/powernv/sriov: Register IOMMU groups for VFs The compound IOMMU group rework moved iommu_register_group() together in pnv_pci_ioda_setup_iommu_api() (which is a part of ppc_md.pcibios_fixup). As the result, pnv_ioda_setup_bus_iommu_group() does not create groups any more, it only adds devices to groups. This works fine for boot time devices. However IOMMU groups for SRIOV's VFs were added by pnv_ioda_setup_bus_iommu_group() so this got broken: pnv_tce_iommu_bus_notifier() expects a group to be registered for VF and it is not. This adds missing group registration and adds a NULL pointer check into the bus notifier so we won't crash if there is no group, although it is not expected to happen now because of the change above. Example oops seen prior to this patch: $ echo 1 > /sys/bus/pci/devices/0000\:01\:00.0/sriov_numvfs Unable to handle kernel paging request for data at address 0x00000030 Faulting instruction address: 0xc0000000004a6018 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA PowerNV CPU: 46 PID: 7006 Comm: bash Not tainted 4.15-ish NIP: c0000000004a6018 LR: c0000000004a6014 CTR: 0000000000000000 REGS: c000008fc876b400 TRAP: 0300 Not tainted (4.15-ish) MSR: 900000000280b033 CFAR: c000000000d0be20 DAR: 0000000000000030 DSISR: 40000000 SOFTE: 1 ... NIP sysfs_do_create_link_sd.isra.0+0x68/0x150 LR sysfs_do_create_link_sd.isra.0+0x64/0x150 Call Trace: pci_dev_type+0x0/0x30 (unreliable) iommu_group_add_device+0x8c/0x600 iommu_add_device+0xe8/0x180 pnv_tce_iommu_bus_notifier+0xb0/0xf0 notifier_call_chain+0x9c/0x110 blocking_notifier_call_chain+0x64/0xa0 device_add+0x524/0x7d0 pci_device_add+0x248/0x450 pci_iov_add_virtfn+0x294/0x3e0 pci_enable_sriov+0x43c/0x580 mlx5_core_sriov_configure+0x15c/0x2f0 [mlx5_core] sriov_numvfs_store+0x180/0x240 dev_attr_store+0x3c/0x60 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1ac/0x240 __vfs_write+0x3c/0x70 vfs_write+0xd8/0x220 SyS_write+0x6c/0x110 system_call+0x58/0x6c Fixes: 0bd971676e68 ("powerpc/powernv/npu: Add compound IOMMU groups") Signed-off-by: Alexey Kardashevskiy Reported-by: Santwana Samantray Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 ++ arch/powerpc/platforms/powernv/pci.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7db3119f8a5b..145373f0e5dc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1593,6 +1593,8 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pnv_pci_ioda2_setup_dma_pe(phb, pe); #ifdef CONFIG_IOMMU_API + iommu_register_group(&pe->table_group, + pe->phb->hose->global_number, pe->pe_number); pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL); #endif } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 45fb70b4bfa7..ef9448a907c6 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1147,6 +1147,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb, return 0; pe = &phb->ioda.pe_array[pdn->pe_number]; + if (!pe->table_group.group) + return 0; iommu_add_device(&pe->table_group, dev); return 0; case BUS_NOTIFY_DEL_DEVICE: -- cgit v1.2.3 From 8cbd468bdeb5ed3acac2d7a9f7494d5b77e46297 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 16 Feb 2019 11:31:48 -0500 Subject: cpufreq: scmi: Fix use-after-free in scmi_cpufreq_exit() This issue was detected with the help of Coccinelle. So change the order of function calls to fix it. Fixes: 1690d8bb91e37 (cpufreq: scpi/scmi: Fix freeing of dynamic OPPs) Signed-off-by: Yangtao Li Acked-by: Viresh Kumar Acked-by: Sudeep Holla Cc: 4.20+ # 4.20+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scmi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 242c3370544e..9ed46d188cb5 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -187,8 +187,8 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - kfree(priv); dev_pm_opp_remove_all_dynamic(priv->cpu_dev); + kfree(priv); return 0; } -- cgit v1.2.3 From 6fc979179c98d2591784937d5618edc3e5cd31c1 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 15 Feb 2019 16:30:42 +0100 Subject: ARM: dts: armada-xp: fix Armada XP boards NAND description Commit 3b79919946cd2cf4dac47842afc9a893acec4ed7 ("ARM: dts: armada-370-xp: update NAND node with new bindings") updated some Marvell Armada DT description to use the new NAND controller bindings, but did it incorrectly for a number of boards: armada-xp-gp, armada-xp-db and armada-xp-lenovo-ix4-300d. Due to this, the NAND is no longer detected on those platforms. This commit fixes that by properly using the new NAND DT binding. This commit was runtime-tested on Armada XP GP, the two other platforms are only compile-tested. Fixes: 3b79919946cd2 ("ARM: dts: armada-370-xp: update NAND node with new bindings") Cc: Miquel Raynal Signed-off-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-xp-db.dts | 46 ++++++------- arch/arm/boot/dts/armada-xp-gp.dts | 13 ++-- arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts | 85 +++++++++++++------------ 3 files changed, 76 insertions(+), 68 deletions(-) diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index f3ac7483afed..5d04dc68cf57 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -144,30 +144,32 @@ status = "okay"; }; - nand@d0000 { + nand-controller@d0000 { status = "okay"; - label = "pxa3xx_nand-0"; - num-cs = <1>; - marvell,nand-keep-config; - nand-on-flash-bbt; - - partitions { - compatible = "fixed-partitions"; - #address-cells = <1>; - #size-cells = <1>; - - partition@0 { - label = "U-Boot"; - reg = <0 0x800000>; - }; - partition@800000 { - label = "Linux"; - reg = <0x800000 0x800000>; - }; - partition@1000000 { - label = "Filesystem"; - reg = <0x1000000 0x3f000000>; + nand@0 { + reg = <0>; + label = "pxa3xx_nand-0"; + nand-rb = <0>; + nand-on-flash-bbt; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "U-Boot"; + reg = <0 0x800000>; + }; + partition@800000 { + label = "Linux"; + reg = <0x800000 0x800000>; + }; + partition@1000000 { + label = "Filesystem"; + reg = <0x1000000 0x3f000000>; + }; }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts index 1139e9469a83..b4cca507cf13 100644 --- a/arch/arm/boot/dts/armada-xp-gp.dts +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -160,12 +160,15 @@ status = "okay"; }; - nand@d0000 { + nand-controller@d0000 { status = "okay"; - label = "pxa3xx_nand-0"; - num-cs = <1>; - marvell,nand-keep-config; - nand-on-flash-bbt; + + nand@0 { + reg = <0>; + label = "pxa3xx_nand-0"; + nand-rb = <0>; + nand-on-flash-bbt; + }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts index bbbb38888bb8..87dcb502f72d 100644 --- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts +++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts @@ -81,49 +81,52 @@ }; - nand@d0000 { + nand-controller@d0000 { status = "okay"; - label = "pxa3xx_nand-0"; - num-cs = <1>; - marvell,nand-keep-config; - nand-on-flash-bbt; - - partitions { - compatible = "fixed-partitions"; - #address-cells = <1>; - #size-cells = <1>; - - partition@0 { - label = "u-boot"; - reg = <0x00000000 0x000e0000>; - read-only; - }; - - partition@e0000 { - label = "u-boot-env"; - reg = <0x000e0000 0x00020000>; - read-only; - }; - - partition@100000 { - label = "u-boot-env2"; - reg = <0x00100000 0x00020000>; - read-only; - }; - - partition@120000 { - label = "zImage"; - reg = <0x00120000 0x00400000>; - }; - - partition@520000 { - label = "initrd"; - reg = <0x00520000 0x00400000>; - }; - partition@e00000 { - label = "boot"; - reg = <0x00e00000 0x3f200000>; + nand@0 { + reg = <0>; + label = "pxa3xx_nand-0"; + nand-rb = <0>; + nand-on-flash-bbt; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "u-boot"; + reg = <0x00000000 0x000e0000>; + read-only; + }; + + partition@e0000 { + label = "u-boot-env"; + reg = <0x000e0000 0x00020000>; + read-only; + }; + + partition@100000 { + label = "u-boot-env2"; + reg = <0x00100000 0x00020000>; + read-only; + }; + + partition@120000 { + label = "zImage"; + reg = <0x00120000 0x00400000>; + }; + + partition@520000 { + label = "initrd"; + reg = <0x00520000 0x00400000>; + }; + + partition@e00000 { + label = "boot"; + reg = <0x00e00000 0x3f200000>; + }; }; }; }; -- cgit v1.2.3 From bdd22a41d55bb0068c8685e28839ed9492e96aba Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 17 Feb 2019 20:21:40 +0200 Subject: arm64: dts: clearfog-gt-8k: fix SGMII PHY reset signal The PHY reset signal goes to mpp43 on CP0. Fixes: babc5544c293 ("arm64: dts: clearfog-gt-8k: 1G eth PHY reset signal") Reported-by: Denis Odintsov Signed-off-by: Baruch Siach Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index 5b4a9609e31f..2468762283a5 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -351,7 +351,7 @@ reg = <0>; pinctrl-names = "default"; pinctrl-0 = <&cp0_copper_eth_phy_reset>; - reset-gpios = <&cp1_gpio1 11 GPIO_ACTIVE_LOW>; + reset-gpios = <&cp0_gpio2 11 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; }; -- cgit v1.2.3 From 759c962d3c9bb1a60e3b4b780daa66ee6d4be13a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 19 Feb 2019 08:46:32 -0800 Subject: ARM: dts: am335x-evmsk: Fix PHY mode for ethernet The PHY must add both tx and rx delay and not only on the tx clock. The board uses AR8031_AL1A PHY where the rx delay is enabled by default, the tx dealy is disabled. The reason why rgmii-txid worked because the rx delay was not disabled by the driver so essentially we ended up with rgmii-id PHY mode. Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evmsk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 172c0224e7f6..b128998097ce 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -651,13 +651,13 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; dual_emac_res_vlan = <1>; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; dual_emac_res_vlan = <2>; }; -- cgit v1.2.3 From 37685f6a63eeca2135d1f704e7638409a071b1f6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 19 Feb 2019 08:46:33 -0800 Subject: ARM: dts: am335x-evm: Fix PHY mode for ethernet The PHY must add both tx and rx delay and not only on the tx clock. The board uses AR8031_AL1A PHY where the rx delay is enabled by default, the tx dealy is disabled. The reason why rgmii-txid worked because the rx delay was not disabled by the driver so essentially we ended up with rgmii-id PHY mode. Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index b67f5fee1469..dce5be5df97b 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -729,7 +729,7 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; }; &tscadc { -- cgit v1.2.3 From 450d007d199e632a1a4c4b91302deacd7d56815f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Feb 2019 23:46:19 +0100 Subject: gpu: drm: radeon: Set DPM_FLAG_NEVER_SKIP when enabling PM-runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HP ProBook 4540s, if PM-runtime is enabled in the radeon driver and the direct-complete optimization is used for the radeon device during system-wide suspend, the system doesn't resume. Preventing direct-complete from being used with the radeon device by setting the DPM_FLAG_NEVER_SKIP driver flag for it makes the problem go away, which indicates that direct-complete is not safe for the radeon driver in general and should not be used with it (at least for now). This fixes a regression introduced by commit c62ec4610c40 ("PM / core: Fix direct_complete handling for devices with no callbacks") which allowed direct-complete to be applied to devices without PM callbacks (again) which in turn unlocked direct-complete for radeon on HP ProBook 4540s. Fixes: c62ec4610c40 ("PM / core: Fix direct_complete handling for devices with no callbacks") Link: https://bugzilla.kernel.org/show_bug.cgi?id=201519 Reported-by: Ярослав Семченко Tested-by: Ярослав Семченко Signed-off-by: Rafael J. Wysocki Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_kms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index dec1e081f529..6a8fb6fd183c 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -172,6 +172,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) } if (radeon_is_px(dev)) { + dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP); pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_active(dev->dev); -- cgit v1.2.3 From d33158530660bc89be3cc870a2152e4e9a76cac7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Feb 2019 17:11:38 -0500 Subject: drm/amdgpu: Set DPM_FLAG_NEVER_SKIP when enabling PM-runtime Based on a similar patch from Rafael for radeon. When using ATPX to control dGPU power, the state is not retained across suspend and resume cycles by default. This can probably be loosened for Hybrid Graphics (_PR3) laptops where I think the state is properly retained. Fixes: c62ec4610c40 ("PM / core: Fix direct_complete handling for devices with no callbacks") Cc: Rafael J. Wysocki Acked-by: Rafael J. Wysocki Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bc62bf41b7e9..5dc349173e4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -212,6 +212,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) } if (amdgpu_device_is_px(dev)) { + dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP); pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_active(dev->dev); -- cgit v1.2.3 From 9db97d8aa8f8a518c421196a504dbfc942ef8d40 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Fri, 15 Feb 2019 11:05:04 -0500 Subject: drm/amdgpu: Update sdma golden setting for vega20 According to hardware engineer, WRITE_BURST_LENGTH [9:8] in register SDMA0_CHICKEN_BITS need to change to 3 for better performance Signed-off-by: shaoyunl Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 6811a5d05b27..aa2f71cc1eba 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -128,7 +128,7 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = { static const struct soc15_reg_golden golden_settings_sdma0_4_2[] = { - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), @@ -158,7 +158,7 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2[] = }; static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { - SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), -- cgit v1.2.3 From d2f0b53bda3193874f3905bc839888f895d1c0cf Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Thu, 24 Jan 2019 15:07:52 -0500 Subject: drm/amd/display: Fix MST reboot/poweroff sequence [Why] drm_dp_mst_topology_mgr_suspend() is added into the new reboot sequence, which disables the UP request at the beginning. Therefore sideband messages are blocked. [How] Finish MST sideband message transaction before UP request is suppressed. Signed-off-by: Leo (Hanghong) Ma Reviewed-by: Roman Li Acked-by: Leo Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0b392bfca284..5296b8f3e0ab 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -786,12 +786,13 @@ static int dm_suspend(void *handle) struct amdgpu_display_manager *dm = &adev->dm; int ret = 0; + WARN_ON(adev->dm.cached_state); + adev->dm.cached_state = drm_atomic_helper_suspend(adev->ddev); + s3_handle_mst(adev->ddev, true); amdgpu_dm_irq_suspend(adev); - WARN_ON(adev->dm.cached_state); - adev->dm.cached_state = drm_atomic_helper_suspend(adev->ddev); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); -- cgit v1.2.3 From 8852ae9a82498207c15262b6294d14aea1796966 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 28 Jan 2019 10:59:34 -0500 Subject: drm/amd/display: Raise dispclk value for dce11 [Why] The visual corruption due to low display clock value. Observed on Carrizo 4K@60Hz. [How] There was earlier patch for dce_update_clocks: Adding +15% workaround also to to dce11_update_clocks Signed-off-by: Roman Li Reviewed-by: Nicholas Kazlauskas Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 19801bdba0d2..7a72ee46f14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -662,6 +662,11 @@ static void dce11_update_clocks(struct clk_mgr *clk_mgr, { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_power_level_change_request level_change_req; + int patched_disp_clk = context->bw.dce.dispclk_khz; + + /*TODO: W/A for dal3 linux, investigate why this works */ + if (!clk_mgr_dce->dfs_bypass_active) + patched_disp_clk = patched_disp_clk * 115 / 100; level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context); /* get max clock state from PPLIB */ @@ -671,9 +676,9 @@ static void dce11_update_clocks(struct clk_mgr *clk_mgr, clk_mgr_dce->cur_min_clks_state = level_change_req.power_level; } - if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { - context->bw.dce.dispclk_khz = dce_set_clock(clk_mgr, context->bw.dce.dispclk_khz); - clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr->clks.dispclk_khz)) { + context->bw.dce.dispclk_khz = dce_set_clock(clk_mgr, patched_disp_clk); + clk_mgr->clks.dispclk_khz = patched_disp_clk; } dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); } -- cgit v1.2.3 From d179b88deb3bf6fed4991a31fd6f0f2cad21fab5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 15 Feb 2019 12:30:19 +0000 Subject: drm/i915/fbdev: Actually configure untiled displays If we skipped all the connectors that were not part of a tile, we would leave conn_seq=0 and conn_configured=0, convincing ourselves that we had stagnated in our configuration attempts. Avoid this situation by starting conn_seq=ALL_CONNECTORS, and repeating until we find no more connectors to configure. Fixes: 754a76591b12 ("drm/i915/fbdev: Stop repeating tile configuration on stagnation") Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190215123019.32283-1-chris@chris-wilson.co.uk Cc: # v3.19+ (cherry picked from commit d9b308b1f8a1acc0c3279f443d4fe0f9f663252e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbdev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 7f365ac0b549..4ee16b264dbe 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -336,8 +336,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); + unsigned long conn_configured, conn_seq; int i, j; bool *save_enabled; bool fallback = true, ret = true; @@ -355,10 +355,9 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, drm_modeset_backoff(&ctx); memcpy(save_enabled, enabled, count); - mask = GENMASK(count - 1, 0); + conn_seq = GENMASK(count - 1, 0); conn_configured = 0; retry: - conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -371,7 +370,8 @@ retry: if (conn_configured & BIT(i)) continue; - if (conn_seq == 0 && !connector->has_tile) + /* First pass, only consider tiled connectors */ + if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -475,8 +475,10 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask && conn_configured != conn_seq) + if (conn_configured != conn_seq) { /* repeat until no more are found */ + conn_seq = conn_configured; goto retry; + } /* * If the BIOS didn't enable everything it could, fall back to have the -- cgit v1.2.3 From 74698f6971f25d045301139413578865fc2bd8f9 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 20 Feb 2019 11:43:05 +0000 Subject: arm64: Relax GIC version check during early boot Updates to the GIC architecture allow ID_AA64PFR0_EL1.GIC to have values other than 0 or 1. At the moment, Linux is quite strict in the way it handles this field at early boot stage (cpufeature is fine) and will refuse to use the system register CPU interface if it doesn't find the value 1. Fixes: 021f653791ad17e03f98aaa7fb933816ae16f161 ("irqchip: gic-v3: Initial support for GICv3") Reported-by: Chase Conklin Reviewed-by: Marc Zyngier Signed-off-by: Vladimir Murzin Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 15d79a8e5e5e..eecf7927dab0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -539,8 +539,7 @@ set_hcr: /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #24, #4 - cmp x0, #1 - b.ne 3f + cbz x0, 3f mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 -- cgit v1.2.3 From 94d9b9337d09bdd27735005b3251d97ab29f7273 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 11 Feb 2019 12:09:19 +0100 Subject: ARM: tegra: Restore DT ABI on Tegra124 Chromebooks Commit 482997699ef0 ("ARM: tegra: Fix unit_address_vs_reg DTC warnings for /memory") inadventently broke device tree ABI by adding a unit- address to the "/memory" node because the device tree compiler flagged the missing unit-address as a warning. Tegra124 Chromebooks (a.k.a. Nyan) use a bootloader that relies on the full name of the memory node in device tree being exactly "/memory". It can be argued whether this was a good decision or not, and some other bootloaders (such as U-Boot) do accept a unit-address in the name of the node, but the device tree is an ABI and we can't break existing setups just because the device tree compiler considers it bad practice to omit the unit-address nowadays. This partially reverts the offending commit and restores device tree ABI compatibility. Fixes: 482997699ef0 ("ARM: tegra: Fix unit_address_vs_reg DTC warnings for /memory") Reported-by: Tristan Bastian Signed-off-by: Thierry Reding Tested-by: Tristan Bastian Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/tegra124-nyan.dtsi | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra124-nyan.dtsi b/arch/arm/boot/dts/tegra124-nyan.dtsi index d5f11d6d987e..bc85b6a166c7 100644 --- a/arch/arm/boot/dts/tegra124-nyan.dtsi +++ b/arch/arm/boot/dts/tegra124-nyan.dtsi @@ -13,10 +13,25 @@ stdout-path = "serial0:115200n8"; }; - memory@80000000 { + /* + * Note that recent version of the device tree compiler (starting with + * version 1.4.2) warn about this node containing a reg property, but + * missing a unit-address. However, the bootloader on these Chromebook + * devices relies on the full name of this node to be exactly /memory. + * Adding the unit-address causes the bootloader to create a /memory + * node and write the memory bank configuration to that node, which in + * turn leads the kernel to believe that the device has 2 GiB of + * memory instead of the amount detected by the bootloader. + * + * The name of this node is effectively ABI and must not be changed. + */ + memory { + device_type = "memory"; reg = <0x0 0x80000000 0x0 0x80000000>; }; + /delete-node/ memory@80000000; + host1x@50000000 { hdmi@54280000 { status = "okay"; -- cgit v1.2.3 From 9c2054a5cf415a9dc32c91ffde78399955deb571 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Feb 2019 10:32:52 +0000 Subject: net: dsa: fix unintended change of bridge interface STP state When a DSA port is added to a bridge and brought up, the resulting STP state programmed into the hardware depends on the order that these operations are performed. However, the Linux bridge code believes that the port is in disabled mode. If the DSA port is first added to a bridge and then brought up, it will be in blocking mode. If it is brought up and then added to the bridge, it will be in disabled mode. This difference is caused by DSA always setting the STP mode in dsa_port_enable() whether or not this port is part of a bridge. Since bridge always sets the STP state when the port is added, brought up or taken down, it is unnecessary for us to manipulate the STP state. Apparently, this code was copied from Rocker, and the very next day a similar fix for Rocker was merged but was not propagated to DSA. See e47172ab7e41 ("rocker: put port in FORWADING state after leaving bridge") Fixes: b73adef67765 ("net: dsa: integrate with SWITCHDEV for HW bridging") Signed-off-by: Russell King Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/port.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/dsa/port.c b/net/dsa/port.c index 2d7e01b23572..2a2a878b5ce3 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -69,7 +69,6 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) { - u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; struct dsa_switch *ds = dp->ds; int port = dp->index; int err; @@ -80,7 +79,8 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) return err; } - dsa_port_set_state_now(dp, stp_state); + if (!dp->bridge_dev) + dsa_port_set_state_now(dp, BR_STATE_FORWARDING); return 0; } @@ -90,7 +90,8 @@ void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy) struct dsa_switch *ds = dp->ds; int port = dp->index; - dsa_port_set_state_now(dp, BR_STATE_DISABLED); + if (!dp->bridge_dev) + dsa_port_set_state_now(dp, BR_STATE_DISABLED); if (ds->ops->port_disable) ds->ops->port_disable(ds, port, phy); -- cgit v1.2.3 From 1b328a2e095a009518ebac05e937cc0fc242fede Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 19 Feb 2019 17:51:14 +0100 Subject: clk: at91: fix at91sam9x5 peripheral clock number nck() looks at the last id in an array and unfortunately, at91sam9x35_periphck has a sentinel, hence the id is 0 and the calculated number of peripheral clocks is 1 instead of a maximum of 31. Fixes: 1eabdc2f9dd8 ("clk: at91: add at91sam9x5 PMCs driver") Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Cc: # v4.20+ Signed-off-by: Stephen Boyd --- drivers/clk/at91/at91sam9x5.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c index 2fe225a697df..d37e7ed9eb90 100644 --- a/drivers/clk/at91/at91sam9x5.c +++ b/drivers/clk/at91/at91sam9x5.c @@ -144,8 +144,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, return; at91sam9x5_pmc = pmc_data_allocate(PMC_MAIN + 1, - nck(at91sam9x5_systemck), - nck(at91sam9x35_periphck), 0); + nck(at91sam9x5_systemck), 31, 0); if (!at91sam9x5_pmc) return; -- cgit v1.2.3 From 65a91e2e597dea62a798a8b771edc44859037e7f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 8 Feb 2019 15:40:59 +0100 Subject: clk: at91: fix masterck name The master clock is actually named masterck earlier in the driver. Having "mck" in the parent list means that it can never be selected. Fixes: 1eabdc2f9dd8 ("clk: at91: add at91sam9x5 PMCs driver") Fixes: a2038077de9a ("clk: at91: add sama5d2 PMC driver") Fixes: 084b696bb509 ("clk: at91: add sama5d4 pmc driver") Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Cc: # v4.20+ Signed-off-by: Stephen Boyd --- drivers/clk/at91/at91sam9x5.c | 2 +- drivers/clk/at91/sama5d2.c | 4 ++-- drivers/clk/at91/sama5d4.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c index d37e7ed9eb90..3487e03d4bc6 100644 --- a/drivers/clk/at91/at91sam9x5.c +++ b/drivers/clk/at91/at91sam9x5.c @@ -209,7 +209,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "mck"; + parent_names[4] = "masterck"; for (i = 0; i < 2; i++) { char name[6]; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index d69ad96fe988..cd0ef7274fdb 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -240,7 +240,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "mck"; + parent_names[4] = "masterck"; for (i = 0; i < 3; i++) { char name[6]; @@ -291,7 +291,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "mck"; + parent_names[4] = "masterck"; parent_names[5] = "audiopll_pmcck"; for (i = 0; i < ARRAY_SIZE(sama5d2_gck); i++) { hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c index e358be7f6c8d..b645a9d59cdb 100644 --- a/drivers/clk/at91/sama5d4.c +++ b/drivers/clk/at91/sama5d4.c @@ -207,7 +207,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "mck"; + parent_names[4] = "masterck"; for (i = 0; i < 3; i++) { char name[6]; -- cgit v1.2.3 From 0921c41e19028314830b33daa681e46b46477c5e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 1 Feb 2019 09:36:59 -0500 Subject: drm/amd/display: Fix negative cursor pos programming [Why] If the cursor pos passed from DM is less than the plane_state->dst_rect top left corner then the unsigned cursor pos wraps around to a large positive number since cursor pos is a u32. There was an attempt to guard against this in hubp1_cursor_set_position by checking the src_x_offset and src_y_offset and offseting the cursor hotspot within hubp1_cursor_set_position. However, the cursor position itself is still being programmed incorrectly as a large value. This manifests itself visually as the cursor disappearing or containing strange artifacts near the middle of the screen on raven. [How] Don't subtract the destination rect top left corner from the pos but add it to the hotspot instead. This happens before the pos gets passed into hubp1_cursor_set_position. This achieves the same result but avoids the subtraction wrap around. With this fix the original cursor programming logic can be used again. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Charlene Liu Acked-by: Leo Li Acked-by: Murton Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 58a12ddf12f3..41883c981789 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2658,8 +2658,8 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .mirror = pipe_ctx->plane_state->horizontal_mirror }; - pos_cpy.x -= pipe_ctx->plane_state->dst_rect.x; - pos_cpy.y -= pipe_ctx->plane_state->dst_rect.y; + pos_cpy.x_hotspot += pipe_ctx->plane_state->dst_rect.x; + pos_cpy.y_hotspot += pipe_ctx->plane_state->dst_rect.y; if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) -- cgit v1.2.3 From 9f7ddbea2bb826a2147309f735726a8b09950944 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Tue, 5 Feb 2019 13:55:20 -0500 Subject: drm/amd/display: fix optimize_bandwidth func pointer for dce80 [Why] optimize_bandwidth was using dce100_prepare_bandwidth this is incorrect [How] change it to dce100_optimize_bandwidth Signed-off-by: Bhawanpreet Lakha Reviewed-by: Charlene Liu Acked-by: Leo Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h | 4 ++++ drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h index acd418515346..a6b80fdaa666 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h @@ -37,6 +37,10 @@ void dce100_prepare_bandwidth( struct dc *dc, struct dc_state *context); +void dce100_optimize_bandwidth( + struct dc *dc, + struct dc_state *context); + bool dce100_enable_display_power_gating(struct dc *dc, uint8_t controller_id, struct dc_bios *dcb, enum pipe_gating_control power_gating); diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c index a60a90e68d91..c4543178ba20 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c @@ -77,6 +77,6 @@ void dce80_hw_sequencer_construct(struct dc *dc) dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating; dc->hwss.pipe_control_lock = dce_pipe_control_lock; dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth; - dc->hwss.optimize_bandwidth = dce100_prepare_bandwidth; + dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth; } -- cgit v1.2.3 From 4ece61a22be5ab5d49cc5fc20a19a0afa24a019d Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Tue, 5 Feb 2019 14:03:52 -0500 Subject: drm/amd/display: set clocks to 0 on suspend on dce80 [Why] When a dce80 asic was suspended, the clocks were not set to 0. Upon resume, the new clock was compared to the existing clock, they were found to be the same, and so the clock was not set. This resulted in a blackscreen. [How] In atomic commit, check to see if there are any active pipes. If no, set clocks to 0 Signed-off-by: Bhawanpreet Lakha Reviewed-by: Nicholas Kazlauskas Acked-by: Leo Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index cdd1d6b7b9f2..4e9ea50141bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -790,9 +790,22 @@ bool dce80_validate_bandwidth( struct dc *dc, struct dc_state *context) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw.dce.dispclk_khz = 681000; - context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; + int i; + bool at_least_one_pipe = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + at_least_one_pipe = true; + } + + if (at_least_one_pipe) { + /* TODO implement when needed but for now hardcode max value*/ + context->bw.dce.dispclk_khz = 681000; + context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; + } else { + context->bw.dce.dispclk_khz = 0; + context->bw.dce.yclk_khz = 0; + } return true; } -- cgit v1.2.3 From a213c2c7e235cfc0e0a161a558f7fdf2fb3a624a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 20 Feb 2019 15:16:06 +0100 Subject: drm/amdgpu: disable bulk moves for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The changes to fix those are two invasive for backporting. Just disable the feature in 4.20 and 5.0. Acked-by: Alex Deucher Signed-off-by: Christian König Cc: [4.20+] Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7c108e687683..698bcb8ce61d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,12 +638,14 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base; +#if 0 if (vm->bulk_moveable) { spin_lock(&glob->lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); spin_unlock(&glob->lru_lock); return; } +#endif memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); -- cgit v1.2.3 From a8fef9ba58c9966ddb1fec916d8d8137c9d8bc89 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 15 Feb 2019 13:55:47 +0000 Subject: net: marvell: mvneta: fix DMA debug warning Booting 4.20 on SolidRun Clearfog issues this warning with DMA API debug enabled: WARNING: CPU: 0 PID: 555 at kernel/dma/debug.c:1230 check_sync+0x514/0x5bc mvneta f1070000.ethernet: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x000000002dd7dc00] [size=240 bytes] Modules linked in: ahci mv88e6xxx dsa_core xhci_plat_hcd xhci_hcd devlink armada_thermal marvell_cesa des_generic ehci_orion phy_armada38x_comphy mcp3021 spi_orion evbug sfp mdio_i2c ip_tables x_tables CPU: 0 PID: 555 Comm: bridge-network- Not tainted 4.20.0+ #291 Hardware name: Marvell Armada 380/385 (Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x9c/0xd4) [] (dump_stack) from [] (__warn+0xf8/0x124) [] (__warn) from [] (warn_slowpath_fmt+0x38/0x48) [] (warn_slowpath_fmt) from [] (check_sync+0x514/0x5bc) [] (check_sync) from [] (debug_dma_sync_single_range_for_cpu+0x6c/0x74) [] (debug_dma_sync_single_range_for_cpu) from [] (mvneta_poll+0x298/0xf58) [] (mvneta_poll) from [] (net_rx_action+0x128/0x424) [] (net_rx_action) from [] (__do_softirq+0xf0/0x540) [] (__do_softirq) from [] (irq_exit+0x124/0x144) [] (irq_exit) from [] (__handle_domain_irq+0x58/0xb0) [] (__handle_domain_irq) from [] (gic_handle_irq+0x48/0x98) [] (gic_handle_irq) from [] (__irq_svc+0x70/0x98) ... This appears to be caused by mvneta_rx_hwbm() calling dma_sync_single_range_for_cpu() with the wrong struct device pointer, as the buffer manager device pointer is used to map and unmap the buffer. Fix this. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 9d4568eb2297..8433fb9c3eee 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2146,7 +2146,7 @@ err_drop_frame: if (unlikely(!skb)) goto err_drop_frame_ret_pool; - dma_sync_single_range_for_cpu(dev->dev.parent, + dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev, rx_desc->buf_phys_addr, MVNETA_MH_SIZE + NET_SKB_PAD, rx_bytes, -- cgit v1.2.3 From ae3b564179bfd06f32d051b9e5d72ce4b2a07c37 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Feb 2019 20:09:35 +0000 Subject: missing barriers in some of unix_sock ->addr and ->path accesses Several u->addr and u->path users are not holding any locks in common with unix_bind(). unix_state_lock() is useless for those purposes. u->addr is assign-once and *(u->addr) is fully set up by the time we set u->addr (all under unix_table_lock). u->path is also set in the same critical area, also before setting u->addr, and any unix_sock with ->path filled will have non-NULL ->addr. So setting ->addr with smp_store_release() is all we need for those "lockless" users - just have them fetch ->addr with smp_load_acquire() and don't even bother looking at ->path if they see NULL ->addr. Users of ->addr and ->path fall into several classes now: 1) ones that do smp_load_acquire(u->addr) and access *(u->addr) and u->path only if smp_load_acquire() has returned non-NULL. 2) places holding unix_table_lock. These are guaranteed that *(u->addr) is seen fully initialized. If unix_sock is in one of the "bound" chains, so's ->path. 3) unix_sock_destructor() using ->addr is safe. All places that set u->addr are guaranteed to have seen all stores *(u->addr) while holding a reference to u and unix_sock_destructor() is called when (atomic) refcount hits zero. 4) unix_release_sock() using ->path is safe. unix_bind() is serialized wrt unix_release() (normally - by struct file refcount), and for the instances that had ->path set by unix_bind() unix_release_sock() comes from unix_release(), so they are fine. Instances that had it set in unix_stream_connect() either end up attached to a socket (in unix_accept()), in which case the call chain to unix_release_sock() and serialization are the same as in the previous case, or they never get accept'ed and unix_release_sock() is called when the listener is shut down and its queue gets purged. In that case the listener's queue lock provides the barriers needed - unix_stream_connect() shoves our unix_sock into listener's queue under that lock right after having set ->path and eventual unix_release_sock() caller picks them from that queue under the same lock right before calling unix_release_sock(). 5) unix_find_other() use of ->path is pointless, but safe - it happens with successful lookup by (abstract) name, so ->path.dentry is guaranteed to be NULL there. earlier-variant-reviewed-by: "Paul E. McKenney" Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/unix/af_unix.c | 57 ++++++++++++++++++++++++++++++---------------------- net/unix/diag.c | 3 ++- security/lsm_audit.c | 10 +++++---- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 74d1eed7cbd4..a95d479caeea 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -890,7 +890,7 @@ retry: addr->hash ^= sk->sk_type; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(&unix_socket_table[addr->hash], sk); spin_unlock(&unix_table_lock); err = 0; @@ -1060,7 +1060,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = 0; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(list, sk); out_unlock: @@ -1331,15 +1331,29 @@ restart: RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); - /* copy address information from listening to new sock*/ - if (otheru->addr) { - refcount_inc(&otheru->addr->refcnt); - newu->addr = otheru->addr; - } + /* copy address information from listening to new sock + * + * The contents of *(otheru->addr) and otheru->path + * are seen fully set up here, since we have found + * otheru in hash under unix_table_lock. Insertion + * into the hash chain we'd found it in had been done + * in an earlier critical area protected by unix_table_lock, + * the same one where we'd set *(otheru->addr) contents, + * as well as otheru->path and otheru->addr itself. + * + * Using smp_store_release() here to set newu->addr + * is enough to make those stores, as well as stores + * to newu->path visible to anyone who gets newu->addr + * by smp_load_acquire(). IOW, the same warranties + * as for unix_sock instances bound in unix_bind() or + * in unix_autobind(). + */ if (otheru->path.dentry) { path_get(&otheru->path); newu->path = otheru->path; } + refcount_inc(&otheru->addr->refcnt); + smp_store_release(&newu->addr, otheru->addr); /* Set credentials */ copy_peercred(sk, other); @@ -1453,7 +1467,7 @@ out: static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; - struct unix_sock *u; + struct unix_address *addr; DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; @@ -1468,19 +1482,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) sock_hold(sk); } - u = unix_sk(sk); - unix_state_lock(sk); - if (!u->addr) { + addr = smp_load_acquire(&unix_sk(sk)->addr); + if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; err = sizeof(short); } else { - struct unix_address *addr = u->addr; - err = addr->len; memcpy(sunaddr, addr->name, addr->len); } - unix_state_unlock(sk); sock_put(sk); out: return err; @@ -2073,11 +2083,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { - struct unix_sock *u = unix_sk(sk); + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); - if (u->addr) { - msg->msg_namelen = u->addr->len; - memcpy(msg->msg_name, u->addr->name, u->addr->len); + if (addr) { + msg->msg_namelen = addr->len; + memcpy(msg->msg_name, addr->name, addr->len); } } @@ -2581,15 +2591,14 @@ static int unix_open_file(struct sock *sk) if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; - unix_state_lock(sk); + if (!smp_load_acquire(&unix_sk(sk)->addr)) + return -ENOENT; + path = unix_sk(sk)->path; - if (!path.dentry) { - unix_state_unlock(sk); + if (!path.dentry) return -ENOENT; - } path_get(&path); - unix_state_unlock(sk); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) @@ -2830,7 +2839,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { + if (u->addr) { // under unix_table_lock here int i, len; seq_putc(seq, ' '); diff --git a/net/unix/diag.c b/net/unix/diag.c index 384c84e83462..3183d9b8ab33 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -10,7 +10,8 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - struct unix_address *addr = unix_sk(sk)->addr; + /* might or might not have unix_table_lock */ + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) return 0; diff --git a/security/lsm_audit.c b/security/lsm_audit.c index f84001019356..33028c098ef3 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -321,6 +321,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, if (a->u.net->sk) { struct sock *sk = a->u.net->sk; struct unix_sock *u; + struct unix_address *addr; int len = 0; char *p = NULL; @@ -351,14 +352,15 @@ static void dump_common_audit_data(struct audit_buffer *ab, #endif case AF_UNIX: u = unix_sk(sk); + addr = smp_load_acquire(&u->addr); + if (!addr) + break; if (u->path.dentry) { audit_log_d_path(ab, " path=", &u->path); break; } - if (!u->addr) - break; - len = u->addr->len-sizeof(short); - p = &u->addr->name->sun_path[0]; + len = addr->len-sizeof(short); + p = &addr->name->sun_path[0]; audit_log_format(ab, " path="); if (*p) audit_log_untrustedstring(ab, p); -- cgit v1.2.3 From 74fb44863084275b952f21ec6a024af0e2e75cb8 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 21 Feb 2019 08:59:02 +0100 Subject: PM-runtime: Fix deadlock when canceling hrtimer When rpm_resume() desactivates the autosuspend timer, it should only try to cancel hrtimer but not wait for the handler to finish, because both rpm_resume() and pm_suspend_timer_fn() take the power.lock. A deadlock is possible as follows: CPU0 CPU1 rpm_resume() spin_lock_irqsave pm_suspend_timer_fn() spin_lock_irqsave pm_runtime_deactivate_timer() hrtimer_cancel() It is sufficient to call hrtimer_try_to_cancel() from pm_runtime_deactivate_timer(), because dev->power.timer_expires reset to 0 by it, so use that function instead of hrtimer_cancel(). Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") Reported-by: Sunzhaosheng Sun(Zhaosheng) Signed-off-by: Vincent Guittot [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0ea2139c50d8..ccd296dbb95c 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -95,7 +95,7 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) static void pm_runtime_deactivate_timer(struct device *dev) { if (dev->power.timer_expires > 0) { - hrtimer_cancel(&dev->power.suspend_timer); + hrtimer_try_to_cancel(&dev->power.suspend_timer); dev->power.timer_expires = 0; } } -- cgit v1.2.3 From 11fe9262ed226c127f67ca4bd85977b22589b68a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 21 Feb 2019 13:07:38 +0100 Subject: Revert "xsk: simplify AF_XDP socket teardown" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e2ce3674883ecba2605370404208c9d4a07ae1c3. It turns out that the sock destructor xsk_destruct was needed after all. The cleanup simplification broke the skb transmit cleanup path, due to that the umem was prematurely destroyed. The umem cannot be destroyed until all outstanding skbs are freed, which means that we cannot remove the umem until the sk_destruct has been called. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 45f3b528dc09..85e4fe4f18cc 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -366,7 +366,6 @@ static int xsk_release(struct socket *sock) xskq_destroy(xs->rx); xskq_destroy(xs->tx); - xdp_put_umem(xs->umem); sock_orphan(sk); sock->sk = NULL; @@ -718,6 +717,18 @@ static const struct proto_ops xsk_proto_ops = { .sendpage = sock_no_sendpage, }; +static void xsk_destruct(struct sock *sk) +{ + struct xdp_sock *xs = xdp_sk(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + return; + + xdp_put_umem(xs->umem); + + sk_refcnt_debug_dec(sk); +} + static int xsk_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -744,6 +755,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk->sk_family = PF_XDP; + sk->sk_destruct = xsk_destruct; + sk_refcnt_debug_inc(sk); + sock_set_flag(sk, SOCK_RCU_FREE); xs = xdp_sk(sk); -- cgit v1.2.3 From a841c673f1352f607fd3ba85de6c9c49ff2c1e12 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 20 Feb 2019 22:18:52 -0800 Subject: revert "initramfs: cleanup incomplete rootfs" Revert ff1522bb7d9845 ("initramfs: cleanup incomplete rootfs"). Andy reports : This breaks my setup where I have U-boot provided more size of initramfs : than needed. This allows a bit of flexibility to increase or decrease : initramfs compressed image without taking care of bootloader. The proper : solution is to do this if we sure that we didn't get enough memory, : otherwise I can't consider the error fatal to clean up rootfs. Fixes: ff1522bb7d9845 ("initramfs: cleanup incomplete rootfs") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Cc: David Engraf Cc: Dominik Brodowski Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: Arnd Bergmann Cc: Luc Van Oostenryck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 7cea802d00ef..fca899622937 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -550,6 +550,7 @@ skip: initrd_end = 0; } +#ifdef CONFIG_BLK_DEV_RAM #define BUF_SIZE 1024 static void __init clean_rootfs(void) { @@ -596,6 +597,7 @@ static void __init clean_rootfs(void) ksys_close(fd); kfree(buf); } +#endif static int __init populate_rootfs(void) { @@ -638,10 +640,8 @@ static int __init populate_rootfs(void) printk(KERN_INFO "Unpacking initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start); - if (err) { + if (err) printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); - clean_rootfs(); - } free_initrd(); #endif } -- cgit v1.2.3 From 050c17f239fd53adb55aa768d4f41bc76c0fe045 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Wed, 20 Feb 2019 22:18:58 -0800 Subject: numa: change get_mempolicy() to use nr_node_ids instead of MAX_NUMNODES The system call, get_mempolicy() [1], passes an unsigned long *nodemask pointer and an unsigned long maxnode argument which specifies the length of the user's nodemask array in bits (which is rounded up). The manual page says that if the maxnode value is too small, get_mempolicy will return EINVAL but there is no system call to return this minimum value. To determine this value, some programs search /proc//status for a line starting with "Mems_allowed:" and use the number of digits in the mask to determine the minimum value. A recent change to the way this line is formatted [2] causes these programs to compute a value less than MAX_NUMNODES so get_mempolicy() returns EINVAL. Change get_mempolicy(), the older compat version of get_mempolicy(), and the copy_nodes_to_user() function to use nr_node_ids instead of MAX_NUMNODES, thus preserving the defacto method of computing the minimum size for the nodemask array and the maxnode argument. [1] http://man7.org/linux/man-pages/man2/get_mempolicy.2.html [2] https://lore.kernel.org/lkml/1545405631-6808-1-git-send-email-longman@redhat.com Link: http://lkml.kernel.org/r/20190211180245.22295-1-rcampbell@nvidia.com Fixes: 4fb8e5b89bcbbbb ("include/linux/nodemask.h: use nr_node_ids (not MAX_NUMNODES) in __nodemask_pr_numnodes()") Signed-off-by: Ralph Campbell Suggested-by: Alexander Duyck Cc: Waiman Long Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d4496d9d34f5..ee2bce59d2bf 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1314,7 +1314,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; - const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long); + unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); if (copy > nbytes) { if (copy > PAGE_SIZE) @@ -1491,7 +1491,7 @@ static int kernel_get_mempolicy(int __user *policy, int uninitialized_var(pval); nodemask_t nodes; - if (nmask != NULL && maxnode < MAX_NUMNODES) + if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; err = do_get_mempolicy(&pval, &nodes, addr, flags); @@ -1527,7 +1527,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long nr_bits, alloc_size; DECLARE_BITMAP(bm, MAX_NUMNODES); - nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); + nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) -- cgit v1.2.3 From e1db95befb3e9e3476629afec6e0f5d0707b9825 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:19:01 -0800 Subject: kasan: fix assigning tags twice When an object is kmalloc()'ed, two hooks are called: kasan_slab_alloc() and kasan_kmalloc(). Right now we assign a tag twice, once in each of the hooks. Fix it by assigning a tag only in the former hook. Link: http://lkml.kernel.org/r/ce8c6431da735aa7ec051fd6497153df690eb021.1549921721.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Joonsoo Kim Cc: Kostya Serebryany Cc: Pekka Enberg Cc: Qian Cai Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 73c9cbfdedf4..09b534fbba17 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -361,10 +361,15 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object) * get different tags. */ static u8 assign_tag(struct kmem_cache *cache, const void *object, - bool init, bool krealloc) + bool init, bool keep_tag) { - /* Reuse the same tag for krealloc'ed objects. */ - if (krealloc) + /* + * 1. When an object is kmalloc()'ed, two hooks are called: + * kasan_slab_alloc() and kasan_kmalloc(). We assign the + * tag only in the first one. + * 2. We reuse the same tag for krealloc'ed objects. + */ + if (keep_tag) return get_tag(object); /* @@ -405,12 +410,6 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, return (void *)object; } -void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, - gfp_t flags) -{ - return kasan_kmalloc(cache, object, cache->object_size, flags); -} - static inline bool shadow_invalid(u8 tag, s8 shadow_byte) { if (IS_ENABLED(CONFIG_KASAN_GENERIC)) @@ -467,7 +466,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) } static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, - size_t size, gfp_t flags, bool krealloc) + size_t size, gfp_t flags, bool keep_tag) { unsigned long redzone_start; unsigned long redzone_end; @@ -485,7 +484,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, KASAN_SHADOW_SCALE_SIZE); if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - tag = assign_tag(cache, object, false, krealloc); + tag = assign_tag(cache, object, false, keep_tag); /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ kasan_unpoison_shadow(set_tag(object, tag), size); @@ -498,10 +497,16 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, return set_tag(object, tag); } +void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, + gfp_t flags) +{ + return __kasan_kmalloc(cache, object, cache->object_size, flags, false); +} + void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size, gfp_t flags) { - return __kasan_kmalloc(cache, object, size, flags, false); + return __kasan_kmalloc(cache, object, size, flags, true); } EXPORT_SYMBOL(kasan_kmalloc); -- cgit v1.2.3 From 53128245b43daad600d9fe72940206570e064112 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:19:11 -0800 Subject: kasan, kmemleak: pass tagged pointers to kmemleak Right now we call kmemleak hooks before assigning tags to pointers in KASAN hooks. As a result, when an objects gets allocated, kmemleak sees a differently tagged pointer, compared to the one it sees when the object gets freed. Fix it by calling KASAN hooks before kmemleak's ones. Link: http://lkml.kernel.org/r/cd825aa4897b0fc37d3316838993881daccbe9f5.1549921721.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reported-by: Qian Cai Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Joonsoo Kim Cc: Kostya Serebryany Cc: Pekka Enberg Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.h | 6 ++---- mm/slab_common.c | 2 +- mm/slub.c | 3 ++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 4190c24ef0e9..638ea1b25d39 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -437,11 +437,9 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, flags &= gfp_allowed_mask; for (i = 0; i < size; i++) { - void *object = p[i]; - - kmemleak_alloc_recursive(object, s->object_size, 1, + p[i] = kasan_slab_alloc(s, p[i], flags); + kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, flags); - p[i] = kasan_slab_alloc(s, object, flags); } if (memcg_kmem_enabled()) diff --git a/mm/slab_common.c b/mm/slab_common.c index 81732d05e74a..fe524c8d0246 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1228,8 +1228,8 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) flags |= __GFP_COMP; page = alloc_pages(flags, order); ret = page ? page_address(page) : NULL; - kmemleak_alloc(ret, size, 1, flags); ret = kasan_kmalloc_large(ret, size, flags); + kmemleak_alloc(ret, size, 1, flags); return ret; } EXPORT_SYMBOL(kmalloc_order); diff --git a/mm/slub.c b/mm/slub.c index 1e3d0ec4e200..4a3d7686902f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1374,8 +1374,9 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, */ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) { + ptr = kasan_kmalloc_large(ptr, size, flags); kmemleak_alloc(ptr, size, 1, flags); - return kasan_kmalloc_large(ptr, size, flags); + return ptr; } static __always_inline void kfree_hook(void *x) -- cgit v1.2.3 From a2f775751d964e638818487544fa8320180d106e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:19:16 -0800 Subject: kmemleak: account for tagged pointers when calculating pointer range kmemleak keeps two global variables, min_addr and max_addr, which store the range of valid (encountered by kmemleak) pointer values, which it later uses to speed up pointer lookup when scanning blocks. With tagged pointers this range will get bigger than it needs to be. This patch makes kmemleak untag pointers before saving them to min_addr and max_addr and when performing a lookup. Link: http://lkml.kernel.org/r/16e887d442986ab87fe87a755815ad92fa431a5f.1550066133.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Tested-by: Qian Cai Acked-by: Catalin Marinas Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Joonsoo Kim Cc: Kostya Serebryany Cc: Pekka Enberg Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 10 +++++++--- mm/slab.h | 1 + mm/slab_common.c | 1 + mm/slub.c | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f9d9dc250428..707fa5579f66 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -574,6 +574,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, unsigned long flags; struct kmemleak_object *object, *parent; struct rb_node **link, *rb_parent; + unsigned long untagged_ptr; object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); if (!object) { @@ -619,8 +620,9 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, write_lock_irqsave(&kmemleak_lock, flags); - min_addr = min(min_addr, ptr); - max_addr = max(max_addr, ptr + size); + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + min_addr = min(min_addr, untagged_ptr); + max_addr = max(max_addr, untagged_ptr + size); link = &object_tree_root.rb_node; rb_parent = NULL; while (*link) { @@ -1333,6 +1335,7 @@ static void scan_block(void *_start, void *_end, unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); unsigned long *end = _end - (BYTES_PER_POINTER - 1); unsigned long flags; + unsigned long untagged_ptr; read_lock_irqsave(&kmemleak_lock, flags); for (ptr = start; ptr < end; ptr++) { @@ -1347,7 +1350,8 @@ static void scan_block(void *_start, void *_end, pointer = *ptr; kasan_enable_current(); - if (pointer < min_addr || pointer >= max_addr) + untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); + if (untagged_ptr < min_addr || untagged_ptr >= max_addr) continue; /* diff --git a/mm/slab.h b/mm/slab.h index 638ea1b25d39..384105318779 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -438,6 +438,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, flags &= gfp_allowed_mask; for (i = 0; i < size; i++) { p[i] = kasan_slab_alloc(s, p[i], flags); + /* As p[i] might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, flags); } diff --git a/mm/slab_common.c b/mm/slab_common.c index fe524c8d0246..f9d89c1b5977 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1229,6 +1229,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) page = alloc_pages(flags, order); ret = page ? page_address(page) : NULL; ret = kasan_kmalloc_large(ret, size, flags); + /* As ret might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc(ret, size, 1, flags); return ret; } diff --git a/mm/slub.c b/mm/slub.c index 4a3d7686902f..f5a451c49190 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1375,6 +1375,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) { ptr = kasan_kmalloc_large(ptr, size, flags); + /* As ptr might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc(ptr, size, 1, flags); return ptr; } -- cgit v1.2.3 From a71012242837fe5e67d8c999cfc357174ed5dba0 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:19:23 -0800 Subject: kasan, slub: move kasan_poison_slab hook before page_address With tag based KASAN page_address() looks at the page flags to see whether the resulting pointer needs to have a tag set. Since we don't want to set a tag when page_address() is called on SLAB pages, we call page_kasan_tag_reset() in kasan_poison_slab(). However in allocate_slab() page_address() is called before kasan_poison_slab(). Fix it by changing the order. [andreyknvl@google.com: fix compilation error when CONFIG_SLUB_DEBUG=n] Link: http://lkml.kernel.org/r/ac27cc0bbaeb414ed77bcd6671a877cf3546d56e.1550066133.git.andreyknvl@google.com Link: http://lkml.kernel.org/r/cd895d627465a3f1c712647072d17f10883be2a1.1549921721.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Joonsoo Kim Cc: Kostya Serebryany Cc: Pekka Enberg Cc: Qian Cai Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index f5a451c49190..a7e7c7f719f9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1075,6 +1075,16 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, init_tracking(s, object); } +static void setup_page_debug(struct kmem_cache *s, void *addr, int order) +{ + if (!(s->flags & SLAB_POISON)) + return; + + metadata_access_enable(); + memset(addr, POISON_INUSE, PAGE_SIZE << order); + metadata_access_disable(); +} + static inline int alloc_consistency_checks(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) @@ -1330,6 +1340,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, #else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) {} +static inline void setup_page_debug(struct kmem_cache *s, + void *addr, int order) {} static inline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { return 0; } @@ -1643,12 +1655,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); - start = page_address(page); + kasan_poison_slab(page); - if (unlikely(s->flags & SLAB_POISON)) - memset(start, POISON_INUSE, PAGE_SIZE << order); + start = page_address(page); - kasan_poison_slab(page); + setup_page_debug(s, start, order); shuffle = shuffle_freelist(s, page); -- cgit v1.2.3 From 18e506610238eda2b0c5a19a123d3d6ec0ab2de6 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:19:28 -0800 Subject: kasan, slub: fix conflicts with CONFIG_SLAB_FREELIST_HARDENED CONFIG_SLAB_FREELIST_HARDENED hashes freelist pointer with the address of the object where the pointer gets stored. With tag based KASAN we don't account for that when building freelist, as we call set_freepointer() with the first argument untagged. This patch changes the code to properly propagate tags throughout the loop. Link: http://lkml.kernel.org/r/3df171559c52201376f246bf7ce3184fe21c1dc7.1549921721.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reported-by: Qian Cai Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vincenzo Frascino Cc: Kostya Serebryany Cc: Evgeniy Stepanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a7e7c7f719f9..80da3a40b74d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -303,11 +303,6 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) __p < (__addr) + (__objects) * (__s)->size; \ __p += (__s)->size) -#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ - for (__p = fixup_red_left(__s, __addr), __idx = 1; \ - __idx <= __objects; \ - __p += (__s)->size, __idx++) - /* Determine object index from a given position */ static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) { @@ -1664,17 +1659,16 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) shuffle = shuffle_freelist(s, page); if (!shuffle) { - for_each_object_idx(p, idx, s, start, page->objects) { - if (likely(idx < page->objects)) { - next = p + s->size; - next = setup_object(s, page, next); - set_freepointer(s, p, next); - } else - set_freepointer(s, p, NULL); - } start = fixup_red_left(s, start); start = setup_object(s, page, start); page->freelist = start; + for (idx = 0, p = start; idx < page->objects - 1; idx++) { + next = p + s->size; + next = setup_object(s, page, next); + set_freepointer(s, p, next); + p = next; + } + set_freepointer(s, p, NULL); } page->inuse = page->objects; -- cgit v1.2.3 From d36a63a943e37081e92e4abdf4a207fd2e83a006 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:19:32 -0800 Subject: kasan, slub: fix more conflicts with CONFIG_SLAB_FREELIST_HARDENED When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. Normally, this doesn't cause any issues, as both set_freepointer() and get_freepointer() are called with a pointer with the same tag. However, there are some issues with CONFIG_SLUB_DEBUG code. For example, when __free_slub() iterates over objects in a cache, it passes untagged pointers to check_object(). check_object() in turns calls get_freepointer() with an untagged pointer, which causes the freepointer to be restored incorrectly. Add kasan_reset_tag to freelist_ptr(). Also add a detailed comment. Link: http://lkml.kernel.org/r/bf858f26ef32eb7bd24c665755b3aee4bc58d0e4.1550103861.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reported-by: Qian Cai Tested-by: Qian Cai Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 80da3a40b74d..c80e6699357c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -249,7 +249,18 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, unsigned long ptr_addr) { #ifdef CONFIG_SLAB_FREELIST_HARDENED - return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr); + /* + * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. + * Normally, this doesn't cause any issues, as both set_freepointer() + * and get_freepointer() are called with a pointer with the same tag. + * However, there are some issues with CONFIG_SLUB_DEBUG code. For + * example, when __free_slub() iterates over objects in a cache, it + * passes untagged pointers to check_object(). check_object() in turns + * calls get_freepointer() with an untagged pointer, which causes the + * freepointer to be restored incorrectly. + */ + return (void *)((unsigned long)ptr ^ s->random ^ + (unsigned long)kasan_reset_tag((void *)ptr_addr)); #else return ptr; #endif -- cgit v1.2.3 From 338cfaad4993d3bc35a740e28981747770a65f90 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 20 Feb 2019 22:19:36 -0800 Subject: slub: fix SLAB_CONSISTENCY_CHECKS + KASAN_SW_TAGS Enabling SLUB_DEBUG's SLAB_CONSISTENCY_CHECKS with KASAN_SW_TAGS triggers endless false positives during boot below due to check_valid_pointer() checks tagged pointers which have no addresses that is valid within slab pages: BUG radix_tree_node (Tainted: G B ): Freelist Pointer check fails ----------------------------------------------------------------------------- INFO: Slab objects=69 used=69 fp=0x (null) flags=0x7ffffffc000200 INFO: Object @offset=15060037153926966016 fp=0x Redzone: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 18 6b 06 00 08 80 ff d0 .........k...... Object : 18 6b 06 00 08 80 ff d0 00 00 00 00 00 00 00 00 .k.............. Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object : 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Redzone: bb bb bb bb bb bb bb bb ........ Padding: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ CPU: 0 PID: 0 Comm: swapper/0 Tainted: G B 5.0.0-rc5+ #18 Call trace: dump_backtrace+0x0/0x450 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xa0/0xfc print_trailer+0x1bc/0x1d0 object_err+0x40/0x50 alloc_debug_processing+0xf0/0x19c ___slab_alloc+0x554/0x704 kmem_cache_alloc+0x2f8/0x440 radix_tree_node_alloc+0x90/0x2fc idr_get_free+0x1e8/0x6d0 idr_alloc_u32+0x11c/0x2a4 idr_alloc+0x74/0xe0 worker_pool_assign_id+0x5c/0xbc workqueue_init_early+0x49c/0xd50 start_kernel+0x52c/0xac4 FIX radix_tree_node: Marking all objects used Link: http://lkml.kernel.org/r/20190209044128.3290-1-cai@lca.pw Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slub.c b/mm/slub.c index c80e6699357c..2d2830134e60 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -513,6 +513,7 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; base = page_address(page); + object = kasan_reset_tag(object); object = restore_red_left(s, object); if (object < base || object >= base + page->objects * s->size || (object - base) % s->size) { -- cgit v1.2.3 From b2b469939e93458753cfbf8282ad52636495965e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 20 Feb 2019 22:19:42 -0800 Subject: proc, oom: do not report alien mms when setting oom_score_adj Tetsuo has reported that creating a thousands of processes sharing MM without SIGHAND (aka alien threads) and setting /proc//oom_score_adj will swamp the kernel log and takes ages [1] to finish. This is especially worrisome that all that printing is done under RCU lock and this can potentially trigger RCU stall or softlockup detector. The primary reason for the printk was to catch potential users who might depend on the behavior prior to 44a70adec910 ("mm, oom_adj: make sure processes sharing mm have same view of oom_score_adj") but after more than 2 years without a single report I guess it is safe to simply remove the printk altogether. The next step should be moving oom_score_adj over to the mm struct and remove all the tasks crawling as suggested by [2] [1] http://lkml.kernel.org/r/97fce864-6f75-bca5-14bc-12c9f890e740@i-love.sakura.ne.jp [2] http://lkml.kernel.org/r/20190117155159.GA4087@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20190212102129.26288-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Acked-by: Johannes Weiner Cc: David Rientjes Cc: Yong-Taek Lee Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 633a63462573..f5ed9512d193 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1086,10 +1086,6 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) task_lock(p); if (!p->vfork_done && process_shares_mm(p, mm)) { - pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", - task_pid_nr(p), p->comm, - p->signal->oom_score_adj, oom_adj, - task_pid_nr(task), task->comm); p->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) p->signal->oom_score_adj_min = (short)oom_adj; -- cgit v1.2.3 From 311ade0eab192f0abee2f70bce761bf0d66990c4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 20 Feb 2019 22:19:45 -0800 Subject: mm/debug.c: fix __dump_page() for poisoned pages Evaluating page_mapping() on a poisoned page ends up dereferencing junk and making PF_POISONED_CHECK() considerably crashier than intended: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000006 Mem abort info: ESR = 0x96000005 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 user pgtable: 4k pages, 39-bit VAs, pgdp = 00000000c2f6ac38 [0000000000000006] pgd=0000000000000000, pud=0000000000000000 Internal error: Oops: 96000005 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 491 Comm: bash Not tainted 5.0.0-rc1+ #1 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Dec 17 2018 pstate: 00000005 (nzcv daif -PAN -UAO) pc : page_mapping+0x18/0x118 lr : __dump_page+0x1c/0x398 Process bash (pid: 491, stack limit = 0x000000004ebd4ecd) Call trace: page_mapping+0x18/0x118 __dump_page+0x1c/0x398 dump_page+0xc/0x18 remove_store+0xbc/0x120 dev_attr_store+0x18/0x28 sysfs_kf_write+0x40/0x50 kernfs_fop_write+0x130/0x1d8 __vfs_write+0x30/0x180 vfs_write+0xb4/0x1a0 ksys_write+0x60/0xd0 __arm64_sys_write+0x18/0x20 el0_svc_common+0x94/0xf8 el0_svc_handler+0x68/0x70 el0_svc+0x8/0xc Code: f9400401 d1000422 f240003f 9a801040 (f9400402) ---[ end trace cdb5eb5bf435cecb ]--- Fix that by not inspecting the mapping until we've determined that it's likely to be valid. Now the above condition still ends up stopping the kernel, but in the correct manner: page:ffffffbf20000000 is uninitialized and poisoned raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) ------------[ cut here ]------------ kernel BUG at ./include/linux/mm.h:1006! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 483 Comm: bash Not tainted 5.0.0-rc1+ #3 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Dec 17 2018 pstate: 40000005 (nZcv daif -PAN -UAO) pc : remove_store+0xbc/0x120 lr : remove_store+0xbc/0x120 ... Link: http://lkml.kernel.org/r/03b53ee9d7e76cda4b9b5e1e31eea080db033396.1550071778.git.robin.murphy@arm.com Fixes: 1c6fb1d89e73 ("mm: print more information about mapping in __dump_page") Signed-off-by: Robin Murphy Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/debug.c b/mm/debug.c index 0abb987dad9b..1611cf00a137 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -44,7 +44,7 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping; bool page_poisoned = PagePoisoned(page); int mapcount; @@ -58,6 +58,8 @@ void __dump_page(struct page *page, const char *reason) goto hex_only; } + mapping = page_mapping(page); + /* * Avoid VM_BUG_ON() in page_mapcount(). * page->_mapcount space in struct page is used by sl[aou]b pages to -- cgit v1.2.3 From 94b3334cbebea34d56a7e6321c6fe9d89b309a49 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 20 Feb 2019 22:19:49 -0800 Subject: mm, page_alloc: fix a division by zero error when boosting watermarks v2 Yury Norov reported that an arm64 KVM instance could not boot since after v5.0-rc1 and could addressed by reverting the patches 1c30844d2dfe272d58c ("mm: reclaim small amounts of memory when an external 73444bc4d8f92e46a20 ("mm, page_alloc: do not wake kswapd with zone lock held") The problem is that a division by zero error is possible if boosting occurs very early in boot if the system has very little memory. This patch avoids the division by zero error. Link: http://lkml.kernel.org/r/20190213143012.GT9565@techsingularity.net Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Mel Gorman Reported-by: Yury Norov Tested-by: Yury Norov Tested-by: Will Deacon Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: David Rientjes Cc: Michal Hocko Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7f79b78bc829..0b9f577b1a2a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2170,6 +2170,18 @@ static inline void boost_watermark(struct zone *zone) max_boost = mult_frac(zone->_watermark[WMARK_HIGH], watermark_boost_factor, 10000); + + /* + * high watermark may be uninitialised if fragmentation occurs + * very early in boot so do not boost. We do not fall + * through and boost by pageblock_nr_pages as failing + * allocations that early means that reclaim is not going + * to help and it may even be impossible to reclaim the + * boosted watermark resulting in a hang. + */ + if (!max_boost) + return; + max_boost = max(pageblock_nr_pages, max_boost); zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, -- cgit v1.2.3 From 6ea183d60c469560e7b08a83c9804299e84ec9eb Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 20 Feb 2019 22:19:54 -0800 Subject: mm: handle lru_add_drain_all for UP properly Since for_each_cpu(cpu, mask) added by commit 2d3854a37e8b767a ("cpumask: introduce new API, without changing anything") did not evaluate the mask argument if NR_CPUS == 1 due to CONFIG_SMP=n, lru_add_drain_all() is hitting WARN_ON() at __flush_work() added by commit 4d43d395fed12463 ("workqueue: Try to catch flush_work() without INIT_WORK().") by unconditionally calling flush_work() [1]. Workaround this issue by using CONFIG_SMP=n specific lru_add_drain_all implementation. There is no real need to defer the implementation to the workqueue as the draining is going to happen on the local cpu. So alias lru_add_drain_all to lru_add_drain which does all the necessary work. [akpm@linux-foundation.org: fix various build warnings] [1] https://lkml.kernel.org/r/18a30387-6aa5-6123-e67c-57579ecc3f38@roeck-us.net Link: http://lkml.kernel.org/r/20190213124334.GH4525@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Guenter Roeck Debugged-by: Tetsuo Handa Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index 4929bc1be60e..4d7d37eb3c40 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -320,11 +320,6 @@ static inline void activate_page_drain(int cpu) { } -static bool need_activate_page_drain(int cpu) -{ - return false; -} - void activate_page(struct page *page) { struct zone *zone = page_zone(page); @@ -653,13 +648,15 @@ void lru_add_drain(void) put_cpu(); } +#ifdef CONFIG_SMP + +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); + static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_drain(); } -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); - /* * Doesn't need any cpu hotplug locking because we do rely on per-cpu * kworkers being shut down before our page_alloc_cpu_dead callback is @@ -702,6 +699,12 @@ void lru_add_drain_all(void) mutex_unlock(&lock); } +#else +void lru_add_drain_all(void) +{ + lru_add_drain(); +} +#endif /** * release_pages - batched put_page() -- cgit v1.2.3 From 4e37504d1c49eec6434d0cc97278d2b51c9e8763 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Feb 2019 22:19:59 -0800 Subject: psi: avoid divide-by-zero crash inside virtual machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've been seeing hard-to-trigger psi crashes when running inside VM instances: divide error: 0000 [#1] SMP PTI Modules linked in: [...] CPU: 0 PID: 212 Comm: kworker/0:2 Not tainted 4.16.18-119_fbk9_3817_gfe944c98d695 #119 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 Workqueue: events psi_clock RIP: 0010:psi_update_stats+0x270/0x490 RSP: 0018:ffffc90001117e10 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8800a35a13f8 RDX: 0000000000000000 RSI: ffff8800a35a1340 RDI: 0000000000000000 RBP: 0000000000000658 R08: ffff8800a35a1470 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 00000000000f8502 FS: 0000000000000000(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbe370fa000 CR3: 00000000b1e3a000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: psi_clock+0x12/0x50 process_one_work+0x1e0/0x390 worker_thread+0x2b/0x3c0 ? rescuer_thread+0x330/0x330 kthread+0x113/0x130 ? kthread_create_worker_on_cpu+0x40/0x40 ? SyS_exit_group+0x10/0x10 ret_from_fork+0x35/0x40 Code: 48 0f 47 c7 48 01 c2 45 85 e4 48 89 16 0f 85 e6 00 00 00 4c 8b 49 10 4c 8b 51 08 49 69 d9 f2 07 00 00 48 6b c0 64 4c 8b 29 31 d2 <48> f7 f7 49 69 d5 8d 06 00 00 48 89 c5 4c 69 f0 00 98 0b 00 48 The Code-line points to `period` being 0 inside update_stats(), and we divide by that when calculating that period's pressure percentage. The elapsed period should never be 0. The reason this can happen is due to an off-by-one in the idle time / missing period calculation combined with a coarse sched_clock() in the virtual machine. The target time for aggregation is advanced into the future on a fixed grid to prevent clock drift. So when an aggregation runs after some idle period, we can not just set it to "now + psi_period", but have to calculate the downtime and advance the target time relative to itself. However, if the aggregator was disabled exactly one psi_period (ns), we drop one idle period in the calculation due to a > when we should do >=. In that case, next_update will be advanced from 'now - psi_period' to 'now' when it should be moved to 'now + psi_period'. The run finishes with last_update == next_update == sched_clock(). With hardware clocks, this exact nanosecond match isn't likely in the first place; but if it does happen, the clock will still have moved on and the period non-zero by the time the worker runs. A pointlessly short period, but besides the extra work, no harm no foul. However, a slow sched_clock() like we have on VMs might not have advanced either by the time the worker runs again. And when we calculate the elapsed period, the result, our pressure divisor, will be 0. Ouch. Fix this by correctly handling the situation when the elapsed time between aggregation runs is precisely two periods, and advance the expiration timestamp correctly to period into the future. Link: http://lkml.kernel.org/r/20190214193157.15788-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Łukasz Siudut Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index c3484785b179..0e97ca9306ef 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -322,7 +322,7 @@ static bool update_stats(struct psi_group *group) expires = group->next_update; if (now < expires) goto out; - if (now - expires > psi_period) + if (now - expires >= psi_period) missed_periods = div_u64(now - expires, psi_period); /* -- cgit v1.2.3 From 1062af920c07f5b54cf5060fde3339da6df0cf6b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2019 08:48:09 -0800 Subject: tmpfs: fix link accounting when a tmpfile is linked in tmpfs has a peculiarity of accounting hard links as if they were separate inodes: so that when the number of inodes is limited, as it is by default, a user cannot soak up an unlimited amount of unreclaimable dcache memory just by repeatedly linking a file. But when v3.11 added O_TMPFILE, and the ability to use linkat() on the fd, we missed accommodating this new case in tmpfs: "df -i" shows that an extra "inode" remains accounted after the file is unlinked and the fd closed and the actual inode evicted. If a user repeatedly links tmpfiles into a tmpfs, the limit will be hit (ENOSPC) even after they are deleted. Just skip the extra reservation from shmem_link() in this case: there's a sense in which this first link of a tmpfile is then cheaper than a hard link of another file, but the accounting works out, and there's still good limiting, so no need to do anything more complicated. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1902182134370.7035@eggly.anvils Fixes: f4e0c30c191 ("allow the temp files created by open() to be linked to") Signed-off-by: Darrick J. Wong Signed-off-by: Hugh Dickins Reported-by: Matej Kupljen Acked-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6ece1e2fe76e..0905215fb016 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2854,10 +2854,14 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr * No ordinary (disk based) filesystem counts links as inodes; * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. + * But if an O_TMPFILE file is linked into the tmpfs, the + * first link must skip that, to get the accounting right. */ - ret = shmem_reserve_inode(inode->i_sb); - if (ret) - goto out; + if (inode->i_nlink) { + ret = shmem_reserve_inode(inode->i_sb); + if (ret) + goto out; + } dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); -- cgit v1.2.3 From 3f41b609382388f95c0a05b69b8db0d706adafb4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:20:15 -0800 Subject: kasan: fix random seed generation for tag-based mode There are two issues with assigning random percpu seeds right now: 1. We use for_each_possible_cpu() to iterate over cpus, but cpumask is not set up yet at the moment of kasan_init(), and thus we only set the seed for cpu #0. 2. A call to get_random_u32() always returns the same number and produces a message in dmesg, since the random subsystem is not yet initialized. Fix 1 by calling kasan_init_tags() after cpumask is set up. Fix 2 by using get_cycles() instead of get_random_u32(). This gives us lower quality random numbers, but it's good enough, as KASAN is meant to be used as a debugging tool and not a mitigation. Link: http://lkml.kernel.org/r/1f815cc914b61f3516ed4cc9bfd9eeca9bd5d9de.1550677973.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Catalin Marinas Cc: Will Deacon Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/setup.c | 3 +++ arch/arm64/mm/kasan_init.c | 2 -- mm/kasan/tags.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index d09ec76f08cf..009849328289 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -339,6 +339,9 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_tags(); + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 4b55b15707a3..f37a86d2a69d 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -252,8 +252,6 @@ void __init kasan_init(void) memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); - kasan_init_tags(); - /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; pr_info("KernelAddressSanitizer initialized\n"); diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c index 0777649e07c4..63fca3172659 100644 --- a/mm/kasan/tags.c +++ b/mm/kasan/tags.c @@ -46,7 +46,7 @@ void kasan_init_tags(void) int cpu; for_each_possible_cpu(cpu) - per_cpu(prng_state, cpu) = get_random_u32(); + per_cpu(prng_state, cpu) = (u32)get_cycles(); } /* -- cgit v1.2.3 From dc15a8a2543cb9ebe67998eefe2880ce1a20d42e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:20:20 -0800 Subject: kasan: prevent tracing of tags.c Similarly to commit 0d0c8de8788b ("kasan: mark file common so ftrace doesn't trace it") add the -pg flag to mm/kasan/tags.c to prevent conflicts with tracing. Link: http://lkml.kernel.org/r/9c4c3ce5ccfb894c7fe66d91de7c1da2787b4da4.1550602886.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reported-by: Qian Cai Tested-by: Qian Cai Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Kostya Serebryany Cc: Evgeniy Stepanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index e2bb06c1b45e..5d1065efbd47 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -7,6 +7,8 @@ KCOV_INSTRUMENT := n CFLAGS_REMOVE_common.o = -pg CFLAGS_REMOVE_generic.o = -pg +CFLAGS_REMOVE_tags.o = -pg + # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 -- cgit v1.2.3 From 219667c23c68eb3dbc0d5662b9246f28477fe529 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:20:25 -0800 Subject: kasan, slab: fix conflicts with CONFIG_HARDENED_USERCOPY Similarly to commit 96fedce27e13 ("kasan: make tag based mode work with CONFIG_HARDENED_USERCOPY"), we need to reset pointer tags in __check_heap_object() in mm/slab.c before doing any pointer math. Link: http://lkml.kernel.org/r/9a5c0f958db10e69df5ff9f2b997866b56b7effc.1550602886.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Tested-by: Qian Cai Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Kostya Serebryany Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/slab.c b/mm/slab.c index 78eb8c5bf4e4..c84458281a88 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4408,6 +4408,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, unsigned int objnr; unsigned long offset; + ptr = kasan_reset_tag(ptr); + /* Find and validate object. */ cachep = page->slab_cache; objnr = obj_to_index(cachep, page, (void *)ptr); -- cgit v1.2.3 From 51dedad06b5f6c3eea7ec1069631b1ef7796912a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:20:28 -0800 Subject: kasan, slab: make freelist stored without tags Similarly to "kasan, slub: move kasan_poison_slab hook before page_address", move kasan_poison_slab() before alloc_slabmgmt(), which calls page_address(), to make page_address() return value to be non-tagged. This, combined with calling kasan_reset_tag() for off-slab slab management object, leads to freelist being stored non-tagged. Link: http://lkml.kernel.org/r/dfb53b44a4d00de3879a05a9f04c1f55e584f7a1.1550602886.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Tested-by: Qian Cai Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Kostya Serebryany Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index c84458281a88..4ad95fcb1686 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2359,7 +2359,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, void *freelist; void *addr = page_address(page); - page->s_mem = kasan_reset_tag(addr) + colour_off; + page->s_mem = addr + colour_off; page->active = 0; if (OBJFREELIST_SLAB(cachep)) @@ -2368,6 +2368,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); + freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { @@ -2681,6 +2682,13 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, offset *= cachep->colour_off; + /* + * Call kasan_poison_slab() before calling alloc_slabmgmt(), so + * page_address() in the latter returns a non-tagged pointer, + * as it should be for slab pages. + */ + kasan_poison_slab(page); + /* Get slab management. */ freelist = alloc_slabmgmt(cachep, page, offset, local_flags & ~GFP_CONSTRAINT_MASK, page_node); @@ -2689,7 +2697,6 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, slab_map_pages(cachep, page, freelist); - kasan_poison_slab(page); cache_init_objs(cachep, page); if (gfpflags_allow_blocking(local_flags)) -- cgit v1.2.3 From 557ea25383a231fe3ffc72881ada35c24b960dbc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 20 Feb 2019 22:20:33 -0800 Subject: kasan, slab: remove redundant kasan_slab_alloc hooks kasan_slab_alloc() calls in kmem_cache_alloc() and kmem_cache_alloc_node() are redundant as they are already called via slab_alloc/slab_alloc_node()-> slab_post_alloc_hook()->kasan_slab_alloc(). Remove them. Link: http://lkml.kernel.org/r/4ca1655cdcfc4379c49c50f7bf80f81c4ad01485.1550602886.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Tested-by: Qian Cai Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgeniy Stepanov Cc: Kostya Serebryany Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 4ad95fcb1686..91c1863df93d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3547,7 +3547,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *ret = slab_alloc(cachep, flags, _RET_IP_); - ret = kasan_slab_alloc(cachep, ret, flags); trace_kmem_cache_alloc(_RET_IP_, ret, cachep->object_size, cachep->size, flags); @@ -3637,7 +3636,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); - ret = kasan_slab_alloc(cachep, ret, flags); trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep->object_size, cachep->size, flags, nodeid); -- cgit v1.2.3 From 6373dca16c911b2828ef8d836d7f6f1800e1bbbc Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 20 Feb 2019 22:20:37 -0800 Subject: slub: fix a crash with SLUB_DEBUG + KASAN_SW_TAGS In process_slab(), "p = get_freepointer()" could return a tagged pointer, but "addr = page_address()" always return a native pointer. As the result, slab_index() is messed up here, return (p - addr) / s->size; All other callers of slab_index() have the same situation where "addr" is from page_address(), so just need to untag "p". # cat /sys/kernel/slab/hugetlbfs_inode_cache/alloc_calls Unable to handle kernel paging request at virtual address 2bff808aa4856d48 Mem abort info: ESR = 0x96000007 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000007 CM = 0, WnR = 0 swapper pgtable: 64k pages, 48-bit VAs, pgdp = 0000000002498338 [2bff808aa4856d48] pgd=00000097fcfd0003, pud=00000097fcfd0003, pmd=00000097fca30003, pte=00e8008b24850712 Internal error: Oops: 96000007 [#1] SMP CPU: 3 PID: 79210 Comm: read_all Tainted: G L 5.0.0-rc7+ #84 Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.0.6 07/10/2018 pstate: 00400089 (nzcv daIf +PAN -UAO) pc : get_map+0x78/0xec lr : get_map+0xa0/0xec sp : aeff808989e3f8e0 x29: aeff808989e3f940 x28: ffff800826200000 x27: ffff100012d47000 x26: 9700000000002500 x25: 0000000000000001 x24: 52ff8008200131f8 x23: 52ff8008200130a0 x22: 52ff800820013098 x21: ffff800826200000 x20: ffff100013172ba0 x19: 2bff808a8971bc00 x18: ffff1000148f5538 x17: 000000000000001b x16: 00000000000000ff x15: ffff1000148f5000 x14: 00000000000000d2 x13: 0000000000000001 x12: 0000000000000000 x11: 0000000020000002 x10: 2bff808aa4856d48 x9 : 0000020000000000 x8 : 68ff80082620ebb0 x7 : 0000000000000000 x6 : ffff1000105da1dc x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000010 x2 : 2bff808a8971bc00 x1 : ffff7fe002098800 x0 : ffff80082620ceb0 Process read_all (pid: 79210, stack limit = 0x00000000f65b9361) Call trace: get_map+0x78/0xec process_slab+0x7c/0x47c list_locations+0xb0/0x3c8 alloc_calls_show+0x34/0x40 slab_attr_show+0x34/0x48 sysfs_kf_seq_show+0x2e4/0x570 kernfs_seq_show+0x12c/0x1a0 seq_read+0x48c/0xf84 kernfs_fop_read+0xd4/0x448 __vfs_read+0x94/0x5d4 vfs_read+0xcc/0x194 ksys_read+0x6c/0xe8 __arm64_sys_read+0x68/0xb0 el0_svc_handler+0x230/0x3bc el0_svc+0x8/0xc Code: d3467d2a 9ac92329 8b0a0e6a f9800151 (c85f7d4b) ---[ end trace a383a9a44ff13176 ]--- Kernel panic - not syncing: Fatal exception SMP: stopping secondary CPUs SMP: failed to stop secondary CPUs 1-7,32,40,127 Kernel Offset: disabled CPU features: 0x002,20000c18 Memory Limit: none ---[ end Kernel panic - not syncing: Fatal exception ]--- Link: http://lkml.kernel.org/r/20190220020251.82039-1-cai@lca.pw Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 2d2830134e60..dc777761b6b7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -317,7 +317,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Determine object index from a given position */ static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) { - return (p - addr) / s->size; + return (kasan_reset_tag(p) - addr) / s->size; } static inline unsigned int order_objects(unsigned int order, unsigned int size) -- cgit v1.2.3 From 6c8fcc096be9d02f478c508052a41a4430506ab3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 20 Feb 2019 22:20:42 -0800 Subject: mm: don't let userspace spam allocations warnings memdump_user usually gets fed unchecked userspace input. Blasting a full backtrace into dmesg every time is a bit excessive - I'm not sure on the kernel rule in general, but at least in drm we're trying not to let unpriviledge userspace spam the logs freely. Definitely not entire warning backtraces. It also means more filtering for our CI, because our testsuite exercises these corner cases and so hits these a lot. Link: http://lkml.kernel.org/r/20190220204058.11676-1-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter Reviewed-by: Andrew Morton Acked-by: Michal Hocko Reviewed-by: Kees Cook Cc: Mike Rapoport Cc: Roman Gushchin Cc: Vlastimil Babka Cc: Jan Stancek Cc: Andrey Ryabinin Cc: "Michael S. Tsirkin" Cc: Huang Ying Cc: Bartosz Golaszewski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index 1ea055138043..379319b1bcfd 100644 --- a/mm/util.c +++ b/mm/util.c @@ -150,7 +150,7 @@ void *memdup_user(const void __user *src, size_t len) { void *p; - p = kmalloc_track_caller(len, GFP_USER); + p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 891cb2a72d821f930a39d5900cb7a3aa752c1d5b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 20 Feb 2019 22:20:46 -0800 Subject: mm, memory_hotplug: fix off-by-one in is_pageblock_removable Rong Chen has reported the following boot crash: PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 239 Comm: udevd Not tainted 5.0.0-rc4-00149-gefad4e4 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 RIP: 0010:page_mapping+0x12/0x80 Code: 5d c3 48 89 df e8 0e ad 02 00 85 c0 75 da 89 e8 5b 5d c3 0f 1f 44 00 00 53 48 89 fb 48 8b 43 08 48 8d 50 ff a8 01 48 0f 45 da <48> 8b 53 08 48 8d 42 ff 83 e2 01 48 0f 44 c3 48 83 38 ff 74 2f 48 RSP: 0018:ffff88801fa87cd8 EFLAGS: 00010202 RAX: ffffffffffffffff RBX: fffffffffffffffe RCX: 000000000000000a RDX: fffffffffffffffe RSI: ffffffff820b9a20 RDI: ffff88801e5c0000 RBP: 6db6db6db6db6db7 R08: ffff88801e8bb000 R09: 0000000001b64d13 R10: ffff88801fa87cf8 R11: 0000000000000001 R12: ffff88801e640000 R13: ffffffff820b9a20 R14: ffff88801f145258 R15: 0000000000000001 FS: 00007fb2079817c0(0000) GS:ffff88801dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000006 CR3: 000000001fa82000 CR4: 00000000000006a0 Call Trace: __dump_page+0x14/0x2c0 is_mem_section_removable+0x24c/0x2c0 removable_show+0x87/0xa0 dev_attr_show+0x25/0x60 sysfs_kf_seq_show+0xba/0x110 seq_read+0x196/0x3f0 __vfs_read+0x34/0x180 vfs_read+0xa0/0x150 ksys_read+0x44/0xb0 do_syscall_64+0x5e/0x4a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe and bisected it down to commit efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone"). The reason for the crash is that the mapping is garbage for poisoned (uninitialized) page. This shouldn't happen as all pages in the zone's boundary should be initialized. Later debugging revealed that the actual problem is an off-by-one when evaluating the end_page. 'start_pfn + nr_pages' resp 'zone_end_pfn' refers to a pfn after the range and as such it might belong to a differen memory section. This along with CONFIG_SPARSEMEM then makes the loop condition completely bogus because a pointer arithmetic doesn't work for pages from two different sections in that memory model. Fix the issue by reworking is_pageblock_removable to be pfn based and only use struct page where necessary. This makes the code slightly easier to follow and we will remove the problematic pointer arithmetic completely. Link: http://lkml.kernel.org/r/20190218181544.14616-1-mhocko@kernel.org Fixes: efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone") Signed-off-by: Michal Hocko Reported-by: Tested-by: Acked-by: Mike Rapoport Reviewed-by: Oscar Salvador Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 124e794867c5..1ad28323fb9f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1188,11 +1188,13 @@ static inline int pageblock_free(struct page *page) return PageBuddy(page) && page_order(page) >= pageblock_order; } -/* Return the start of the next active pageblock after a given page */ -static struct page *next_active_pageblock(struct page *page) +/* Return the pfn of the start of the next active pageblock after a given pfn */ +static unsigned long next_active_pageblock(unsigned long pfn) { + struct page *page = pfn_to_page(pfn); + /* Ensure the starting page is pageblock-aligned */ - BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); + BUG_ON(pfn & (pageblock_nr_pages - 1)); /* If the entire pageblock is free, move to the end of free page */ if (pageblock_free(page)) { @@ -1200,16 +1202,16 @@ static struct page *next_active_pageblock(struct page *page) /* be careful. we don't have locks, page_order can be changed.*/ order = page_order(page); if ((order < MAX_ORDER) && (order >= pageblock_order)) - return page + (1 << order); + return pfn + (1 << order); } - return page + pageblock_nr_pages; + return pfn + pageblock_nr_pages; } -static bool is_pageblock_removable_nolock(struct page *page) +static bool is_pageblock_removable_nolock(unsigned long pfn) { + struct page *page = pfn_to_page(pfn); struct zone *zone; - unsigned long pfn; /* * We have to be careful here because we are iterating over memory @@ -1232,13 +1234,14 @@ static bool is_pageblock_removable_nolock(struct page *page) /* Checks if this range of memory is likely to be hot-removable. */ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { - struct page *page = pfn_to_page(start_pfn); - unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); - struct page *end_page = pfn_to_page(end_pfn); + unsigned long end_pfn, pfn; + + end_pfn = min(start_pfn + nr_pages, + zone_end_pfn(page_zone(pfn_to_page(start_pfn)))); /* Check the starting page of each pageblock within the range */ - for (; page < end_page; page = next_active_pageblock(page)) { - if (!is_pageblock_removable_nolock(page)) + for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) { + if (!is_pageblock_removable_nolock(pfn)) return false; cond_resched(); } -- cgit v1.2.3 From 193f3685d0546b0cea20c99894aadb70098e47bf Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 11:19:41 +0100 Subject: ipv6: route: enforce RCU protection in rt6_update_exception_stamp_rt() We must access rt6_info->from under RCU read lock: move the dereference under such lock, with proper annotation. v1 -> v2: - avoid using multiple, racy, fetch operations for rt->from Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 964491cf3672..2b1ed8c6fcab 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1599,15 +1599,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; - - if (!from || - !(rt->rt6i_flags & RTF_CACHE)) - return; + struct fib6_info *from; rcu_read_lock(); + from = rcu_dereference(rt->from); + if (!from || !(rt->rt6i_flags & RTF_CACHE)) + goto unlock; + bucket = rcu_dereference(from->rt6i_exception_bucket); #ifdef CONFIG_IPV6_SUBTREES @@ -1626,6 +1626,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) if (rt6_ex) rt6_ex->stamp = jiffies; +unlock: rcu_read_unlock(); } -- cgit v1.2.3 From bf1dc8bad1d42287164d216d8efb51c5cd381b18 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 11:19:42 +0100 Subject: ipv6: route: enforce RCU protection in ip6_route_check_nh_onlink() We need a RCU critical section around rt6_info->from deference, and proper annotation. Fixes: 4ed591c8ab44 ("net/ipv6: Allow onlink routes to have a device mismatch if it is the default route") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2b1ed8c6fcab..74b9b6fd4168 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2743,20 +2743,24 @@ static int ip6_route_check_nh_onlink(struct net *net, u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; const struct in6_addr *gw_addr = &cfg->fc_gateway; u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; + struct fib6_info *from; struct rt6_info *grt; int err; err = 0; grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { + rcu_read_lock(); + from = rcu_dereference(grt->from); if (!grt->dst.error && /* ignore match if it is the default route */ - grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && + from && !ipv6_addr_any(&from->fib6_dst.addr) && (grt->rt6i_flags & flags || dev != grt->dst.dev)) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway or device mismatch"); err = -EINVAL; } + rcu_read_unlock(); ip6_rt_put(grt); } -- cgit v1.2.3 From d7cf4a3bf3a83c977a29055e1c4ffada7697b31f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Feb 2019 12:56:54 +0100 Subject: net/smc: fix smc_poll in SMC_INIT state smc_poll() returns with mask bit EPOLLPRI if the connection urg_state is SMC_URG_VALID. Since SMC_URG_VALID is zero, smc_poll signals EPOLLPRI errorneously if called in state SMC_INIT before the connection is created, for instance in a non-blocking connect scenario. This patch switches to non-zero values for the urg states. Reviewed-by: Karsten Graul Fixes: de8474eb9d50 ("net/smc: urgent data support") Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index 5721416d0605..adbdf195eb08 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -113,9 +113,9 @@ struct smc_host_cdc_msg { /* Connection Data Control message */ } __aligned(8); enum smc_urg_state { - SMC_URG_VALID, /* data present */ - SMC_URG_NOTYET, /* data pending */ - SMC_URG_READ /* data was already read */ + SMC_URG_VALID = 1, /* data present */ + SMC_URG_NOTYET = 2, /* data pending */ + SMC_URG_READ = 3, /* data was already read */ }; struct smc_connection { -- cgit v1.2.3 From 156a67a9065e3339be85f811d1b13b920e50d73b Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Mon, 28 Jan 2019 09:45:01 -0800 Subject: ixgbe: fix older devices that do not support IXGBE_MRQC_L3L4TXSWEN The enabling L3/L4 filtering for transmit switched packets for all devices caused unforeseen issue on older devices when trying to send UDP traffic in an ordered sequence. This bit was originally intended for X550 devices, which supported this feature, so limit the scope of this bit to only X550 devices. Signed-off-by: Jeff Kirsher Tested-by: Andrew Bowers --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index daff8183534b..3cbb7e0324fd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3953,8 +3953,11 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) else mrqc = IXGBE_MRQC_VMDQRSS64EN; - /* Enable L3/L4 for Tx Switched packets */ - mrqc |= IXGBE_MRQC_L3L4TXSWEN; + /* Enable L3/L4 for Tx Switched packets only for X550, + * older devices do not support this feature + */ + if (hw->mac.type >= ixgbe_mac_X550) + mrqc |= IXGBE_MRQC_L3L4TXSWEN; } else { if (tcs > 4) mrqc = IXGBE_MRQC_RTRSS8TCEN; -- cgit v1.2.3 From 14ffeb52f3693ae0b674e59453452a2365ae9fd9 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 29 Jan 2019 15:03:17 +0100 Subject: i40e: fix potential RX buffer starvation for AF_XDP When the RX rings are created they are also populated with buffers so that packets can be received. Usually these are kernel buffers, but for AF_XDP in zero-copy mode, these are user-space buffers and in this case the application might not have sent down any buffers to the driver at this point. And if no buffers are allocated at ring creation time, no packets can be received and no interrupts will be generated so the NAPI poll function that allocates buffers to the rings will never get executed. To rectify this, we kick the NAPI context of any queue with an attached AF_XDP zero-copy socket in two places in the code. Once after an XDP program has loaded and once after the umem is registered. This take care of both cases: XDP program gets loaded first then AF_XDP socket is created, and the reverse, AF_XDP socket is created first, then XDP program is loaded. Fixes: 0a714186d3c0 ("i40e: add AF_XDP zero-copy Rx support") Signed-off-by: Magnus Karlsson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 13 ++++++++++++- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 5 +++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f52e2c46e6a7..3a0990de81c1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3289,8 +3289,11 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) : !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); if (!ok) { + /* Log this in case the user has forgotten to give the kernel + * any buffers, even later in the application. + */ dev_info(&vsi->back->pdev->dev, - "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", + "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n", ring->xsk_umem ? "UMEM enabled " : "", ring->queue_index, pf_q); } @@ -11895,6 +11898,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, if (old_prog) bpf_prog_put(old_prog); + /* Kick start the NAPI context if there is an AF_XDP socket open + * on that queue id. This so that receiving will start. + */ + if (need_reset && prog) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->xdp_rings[i]->xsk_umem) + (void)i40e_xsk_async_xmit(vsi->netdev, i); + return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 870cf654e436..3827f16e6923 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -183,6 +183,11 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, err = i40e_queue_pair_enable(vsi, qid); if (err) return err; + + /* Kick start the NAPI context so that receiving will start */ + err = i40e_xsk_async_xmit(vsi->netdev, qid); + if (err) + return err; } return 0; -- cgit v1.2.3 From 4a9b32f30f805ca596d76605903a48eab58e0b88 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 29 Jan 2019 15:03:50 +0100 Subject: ixgbe: fix potential RX buffer starvation for AF_XDP When the RX rings are created they are also populated with buffers so that packets can be received. Usually these are kernel buffers, but for AF_XDP in zero-copy mode, these are user-space buffers and in this case the application might not have sent down any buffers to the driver at this point. And if no buffers are allocated at ring creation time, no packets can be received and no interrupts will be generated so the NAPI poll function that allocates buffers to the rings will never get executed. To rectify this, we kick the NAPI context of any queue with an attached AF_XDP zero-copy socket in two places in the code. Once after an XDP program has loaded and once after the umem is registered. This take care of both cases: XDP program gets loaded first then AF_XDP socket is created, and the reverse, AF_XDP socket is created first, then XDP program is loaded. Fixes: d0bcacd0a130 ("ixgbe: add AF_XDP zero-copy Rx support") Signed-off-by: Magnus Karlsson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++++++- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 12 ++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3cbb7e0324fd..cb35d8202572 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10228,6 +10228,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct ixgbe_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; + bool need_reset; if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) return -EINVAL; @@ -10250,9 +10251,10 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) return -ENOMEM; old_prog = xchg(&adapter->xdp_prog, prog); + need_reset = (!!prog != !!old_prog); /* If transitioning XDP modes reconfigure rings */ - if (!!prog != !!old_prog) { + if (need_reset) { int err = ixgbe_setup_tc(dev, adapter->hw_tcs); if (err) { @@ -10268,6 +10270,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (old_prog) bpf_prog_put(old_prog); + /* Kick start the NAPI context if there is an AF_XDP socket open + * on that queue id. This so that receiving will start. + */ + if (need_reset && prog) + for (i = 0; i < adapter->num_rx_queues; i++) + if (adapter->xdp_ring[i]->xsk_umem) + (void)ixgbe_xsk_async_xmit(adapter->netdev, i); + return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 65c3e2c979d4..654ae92342ea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -144,11 +144,19 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, ixgbe_txrx_ring_disable(adapter, qid); err = ixgbe_add_xsk_umem(adapter, umem, qid); + if (err) + return err; - if (if_running) + if (if_running) { ixgbe_txrx_ring_enable(adapter, qid); - return err; + /* Kick start the NAPI context so that receiving will start */ + err = ixgbe_xsk_async_xmit(adapter->netdev, qid); + if (err) + return err; + } + + return 0; } static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid) -- cgit v1.2.3 From 252f6e8eae909bc075a1b1e3b9efb095ae4c0b56 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 16 Jan 2019 14:29:50 +0300 Subject: ARCv2: Enable unaligned access in early ASM code It is currently done in arc_init_IRQ() which might be too late considering gcc 7.3.1 onwards (GNU 2018.03) generates unaligned memory accesses by default Cc: stable@vger.kernel.org #4.4+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: rewrote changelog] --- arch/arc/kernel/head.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 8b90d25a15cc..26e33a8b2d18 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -17,6 +17,7 @@ #include #include #include +#include .macro CPU_EARLY_SETUP @@ -47,6 +48,15 @@ sr r5, [ARC_REG_DC_CTRL] 1: + +#ifdef CONFIG_ISA_ARCV2 + ; Unaligned access is disabled at reset, so re-enable early as + ; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access + ; by default + lr r5, [status32] + bset r5, r5, STATUS_AD_BIT + kflag r5 +#endif .endm .section .init.text, "ax",@progbits -- cgit v1.2.3 From f8a15f97664178f27dfbf86a38f780a532cb6df0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 30 Jan 2019 19:32:40 +0300 Subject: ARCv2: lib: memcpy: fix doing prefetchw outside of buffer ARCv2 optimized memcpy uses PREFETCHW instruction for prefetching the next cache line but doesn't ensure that the line is not past the end of the buffer. PRETECHW changes the line ownership and marks it dirty, which can cause data corruption if this area is used for DMA IO. Fix the issue by avoiding the PREFETCHW. This leads to performance degradation but it is OK as we'll introduce new memcpy implementation optimized for unaligned memory access using. We also cut off all PREFETCH instructions at they are quite useless here: * we call PREFETCH right before LOAD instruction call. * we copy 16 or 32 bytes of data (depending on CONFIG_ARC_HAS_LL64) in a main logical loop. so we call PREFETCH 4 times (or 2 times) for each L1 cache line (in case of 64B L1 cache Line which is default case). Obviously this is not optimal. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/lib/memcpy-archs.S | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S index d61044dd8b58..ea14b0bf3116 100644 --- a/arch/arc/lib/memcpy-archs.S +++ b/arch/arc/lib/memcpy-archs.S @@ -25,15 +25,11 @@ #endif #ifdef CONFIG_ARC_HAS_LL64 -# define PREFETCH_READ(RX) prefetch [RX, 56] -# define PREFETCH_WRITE(RX) prefetchw [RX, 64] # define LOADX(DST,RX) ldd.ab DST, [RX, 8] # define STOREX(SRC,RX) std.ab SRC, [RX, 8] # define ZOLSHFT 5 # define ZOLAND 0x1F #else -# define PREFETCH_READ(RX) prefetch [RX, 28] -# define PREFETCH_WRITE(RX) prefetchw [RX, 32] # define LOADX(DST,RX) ld.ab DST, [RX, 4] # define STOREX(SRC,RX) st.ab SRC, [RX, 4] # define ZOLSHFT 4 @@ -41,8 +37,6 @@ #endif ENTRY_CFI(memcpy) - prefetch [r1] ; Prefetch the read location - prefetchw [r0] ; Prefetch the write location mov.f 0, r2 ;;; if size is zero jz.d [blink] @@ -72,8 +66,6 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy32_64bytes ;; LOOP START LOADX (r6, r1) - PREFETCH_READ (r1) - PREFETCH_WRITE (r3) LOADX (r8, r1) LOADX (r10, r1) LOADX (r4, r1) @@ -117,9 +109,7 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy8bytes_1 ;; LOOP START ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 24) or r7, r7, r5 @@ -162,9 +152,7 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy8bytes_2 ;; LOOP START ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 16) or r7, r7, r5 @@ -204,9 +192,7 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy8bytes_3 ;; LOOP START ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 8) or r7, r7, r5 -- cgit v1.2.3 From cdf92962adb0cb23efc3c8bcf6465d16ab7c3a81 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 4 Feb 2019 21:41:51 +0300 Subject: ARC: fix actionpoints configuration detection Fix reversed logic while actionpoints configuration (full/min) detection. Fixies: 7dd380c338f1e ("ARC: boot log: print Action point details") Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index feb90093e6b1..def19b0ef8c6 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -212,7 +212,7 @@ static void read_arc_build_cfg_regs(void) READ_BCR(ARC_REG_AP_BCR, ap); if (ap.ver) { cpu->extn.ap_num = 2 << ap.num; - cpu->extn.ap_full = !!ap.min; + cpu->extn.ap_full = !ap.min; } READ_BCR(ARC_REG_SMART_BCR, bcr); -- cgit v1.2.3 From d5e3c55e01d8b1774b37b4647c30fb22f1d39077 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 5 Feb 2019 10:07:07 -0800 Subject: ARC: uacces: remove lp_start, lp_end from clobber list Newer ARC gcc handles lp_start, lp_end in a different way and doesn't like them in the clobber list. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index c9173c02081c..eabc3efa6c6d 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -207,7 +207,7 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) */ "=&r" (tmp), "+r" (to), "+r" (from) : - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return n; } @@ -433,7 +433,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) */ "=&r" (tmp), "+r" (to), "+r" (from) : - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return n; } @@ -653,7 +653,7 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) " .previous \n" : "+r"(d_char), "+r"(res) : "i"(0) - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return res; } @@ -686,7 +686,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) : "g"(-EFAULT), "r"(count) - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return res; } -- cgit v1.2.3 From e494239a007e601448110ac304fe055951f9de3b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 Jun 2018 10:20:37 -0700 Subject: ARCv2: support manual regfile save on interrupts There's a hardware bug which affects the HSDK platform, triggered by micro-ops for auto-saving regfile on taken interrupt. The workaround is to inhibit autosave. Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 8 ++++++ arch/arc/include/asm/entry-arcv2.h | 54 ++++++++++++++++++++++++++++++++++++++ arch/arc/kernel/entry-arcv2.S | 4 ++- arch/arc/kernel/intc-arcv2.c | 2 ++ arch/arc/plat-hsdk/Kconfig | 1 + 5 files changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 376366a7db81..7215f52b3413 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -407,6 +407,14 @@ config ARC_HAS_ACCL_REGS (also referred to as r58:r59). These can also be used by gcc as GPR so kernel needs to save/restore per process +config ARC_IRQ_NO_AUTOSAVE + bool "Disable hardware autosave regfile on interrupts" + default n + help + On HS cores, taken interrupt auto saves the regfile on stack. + This is programmable and can be optionally disabled in which case + software INTERRUPT_PROLOGUE/EPILGUE do the needed work + endif # ISA_ARCV2 endmenu # "ARC CPU Configuration" diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 309f4e6721b3..225e7df2d8ed 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -17,6 +17,33 @@ ; ; Now manually save: r12, sp, fp, gp, r25 +#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE +.ifnc \called_from, exception + st.as r9, [sp, -10] ; save r9 in it's final stack slot + sub sp, sp, 12 ; skip JLI, LDI, EI + + PUSH lp_count + PUSHAX lp_start + PUSHAX lp_end + PUSH blink + + PUSH r11 + PUSH r10 + + sub sp, sp, 4 ; skip r9 + + PUSH r8 + PUSH r7 + PUSH r6 + PUSH r5 + PUSH r4 + PUSH r3 + PUSH r2 + PUSH r1 + PUSH r0 +.endif +#endif + #ifdef CONFIG_ARC_HAS_ACCL_REGS PUSH r59 PUSH r58 @@ -86,6 +113,33 @@ POP r59 #endif +#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE +.ifnc \called_from, exception + POP r0 + POP r1 + POP r2 + POP r3 + POP r4 + POP r5 + POP r6 + POP r7 + POP r8 + POP r9 + POP r10 + POP r11 + + POP blink + POPAX lp_end + POPAX lp_start + + POP r9 + mov lp_count, r9 + + add sp, sp, 12 ; skip JLI, LDI, EI + ld.as r9, [sp, -10] ; reload r9 which got clobbered +.endif +#endif + .endm /*------------------------------------------------------------------------*/ diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index cc558a25b8fa..562089d62d9d 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -209,7 +209,9 @@ restore_regs: ;####### Return from Intr ####### debug_marker_l1: - bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot + ; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot + btst r0, STATUS_DE_BIT ; Z flag set if bit clear + bnz .Lintr_ret_to_delay_slot ; branch if STATUS_DE_BIT set .Lisr_ret_fast_path: ; Handle special case #1: (Entry via Exception, Return via IRQ) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 067ea362fb3e..cf18b3e5a934 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -49,11 +49,13 @@ void arc_init_IRQ(void) *(unsigned int *)&ictrl = 0; +#ifndef CONFIG_ARC_IRQ_NO_AUTOSAVE ictrl.save_nr_gpr_pairs = 6; /* r0 to r11 (r12 saved manually) */ ictrl.save_blink = 1; ictrl.save_lp_regs = 1; /* LP_COUNT, LP_START, LP_END */ ictrl.save_u_to_u = 0; /* user ctxt saved on kernel stack */ ictrl.save_idx_regs = 1; /* JLI, LDI, EI */ +#endif WRITE_AUX(AUX_IRQ_CTRL, ictrl); diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index f25c085b9874..23e00216e5a5 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -9,6 +9,7 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" depends on ISA_ARCV2 select ARC_HAS_ACCL_REGS + select ARC_IRQ_NO_AUTOSAVE select CLK_HSDK select RESET_HSDK select HAVE_PCI -- cgit v1.2.3 From a66f2e57bd566240d8b3884eedf503928fbbe557 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 14 Feb 2019 18:07:44 +0300 Subject: ARC: U-boot: check arguments paranoidly Handle U-boot arguments paranoidly: * don't allow to pass unknown tag. * try to use external device tree blob only if corresponding tag (TAG_DTB) is set. * don't check uboot_tag if kernel build with no ARC_UBOOT_SUPPORT. NOTE: If U-boot args are invalid we skip them and try to use embedded device tree blob. We can't panic on invalid U-boot args as we really pass invalid args due to bug in U-boot code. This happens if we don't provide external DTB to U-boot and don't set 'bootargs' U-boot environment variable (which is default case at least for HSDK board) In that case we will pass {r0 = 1 (bootargs in r2); r1 = 0; r2 = 0;} to linux which is invalid. While I'm at it refactor U-boot arguments handling code. Cc: stable@vger.kernel.org Tested-by: Corentin LABBE Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/kernel/head.S | 4 +-- arch/arc/kernel/setup.c | 87 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 26e33a8b2d18..1f945d0f40da 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -103,9 +103,9 @@ ENTRY(stext) #ifdef CONFIG_ARC_UBOOT_SUPPORT ; Uboot - kernel ABI ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 - ; r1 = magic number (board identity, unused as of now + ; r1 = magic number (always zero as of now) ; r2 = pointer to uboot provided cmdline or external DTB in mem - ; These are handled later in setup_arch() + ; These are handled later in handle_uboot_args() st r0, [@uboot_tag] st r2, [@uboot_arg] #endif diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index def19b0ef8c6..8bb156164556 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -462,43 +462,80 @@ void setup_processor(void) arc_chk_core_config(); } -static inline int is_kernel(unsigned long addr) +static inline bool uboot_arg_invalid(unsigned long addr) { - if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) - return 1; - return 0; + /* + * Check that it is a untranslated address (although MMU is not enabled + * yet, it being a high address ensures this is not by fluke) + */ + if (addr < PAGE_OFFSET) + return true; + + /* Check that address doesn't clobber resident kernel image */ + return addr >= (unsigned long)_stext && addr <= (unsigned long)_end; } -void __init setup_arch(char **cmdline_p) +#define IGNORE_ARGS "Ignore U-boot args: " + +/* uboot_tag values for U-boot - kernel ABI revision 0; see head.S */ +#define UBOOT_TAG_NONE 0 +#define UBOOT_TAG_CMDLINE 1 +#define UBOOT_TAG_DTB 2 + +void __init handle_uboot_args(void) { + bool use_embedded_dtb = true; + bool append_cmdline = false; + #ifdef CONFIG_ARC_UBOOT_SUPPORT - /* make sure that uboot passed pointer to cmdline/dtb is valid */ - if (uboot_tag && is_kernel((unsigned long)uboot_arg)) - panic("Invalid uboot arg\n"); + /* check that we know this tag */ + if (uboot_tag != UBOOT_TAG_NONE && + uboot_tag != UBOOT_TAG_CMDLINE && + uboot_tag != UBOOT_TAG_DTB) { + pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag); + goto ignore_uboot_args; + } + + if (uboot_tag != UBOOT_TAG_NONE && + uboot_arg_invalid((unsigned long)uboot_arg)) { + pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg); + goto ignore_uboot_args; + } + + /* see if U-boot passed an external Device Tree blob */ + if (uboot_tag == UBOOT_TAG_DTB) { + machine_desc = setup_machine_fdt((void *)uboot_arg); - /* See if u-boot passed an external Device Tree blob */ - machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */ - if (!machine_desc) + /* external Device Tree blob is invalid - use embedded one */ + use_embedded_dtb = !machine_desc; + } + + if (uboot_tag == UBOOT_TAG_CMDLINE) + append_cmdline = true; + +ignore_uboot_args: #endif - { - /* No, so try the embedded one */ + + if (use_embedded_dtb) { machine_desc = setup_machine_fdt(__dtb_start); if (!machine_desc) panic("Embedded DT invalid\n"); + } - /* - * If we are here, it is established that @uboot_arg didn't - * point to DT blob. Instead if u-boot says it is cmdline, - * append to embedded DT cmdline. - * setup_machine_fdt() would have populated @boot_command_line - */ - if (uboot_tag == 1) { - /* Ensure a whitespace between the 2 cmdlines */ - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, uboot_arg, - COMMAND_LINE_SIZE); - } + /* + * NOTE: @boot_command_line is populated by setup_machine_fdt() so this + * append processing can only happen after. + */ + if (append_cmdline) { + /* Ensure a whitespace between the 2 cmdlines */ + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE); } +} + +void __init setup_arch(char **cmdline_p) +{ + handle_uboot_args(); /* Save unparsed command line copy for /proc/cmdline */ *cmdline_p = boot_command_line; -- cgit v1.2.3 From 493a2f812446e92bcb1e69a77381b4d39808d730 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 14 Feb 2019 18:07:45 +0300 Subject: ARC: enable uboot support unconditionally After reworking U-boot args handling code and adding paranoid arguments check we can eliminate CONFIG_ARC_UBOOT_SUPPORT and enable uboot support unconditionally. For JTAG case we can assume that core registers will come up reset value of 0 or in worst case we rely on user passing '-on=clear_regs' to Metaware debugger. Cc: stable@vger.kernel.org Tested-by: Corentin LABBE Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 12 ------------ arch/arc/configs/nps_defconfig | 1 - arch/arc/configs/vdk_hs38_defconfig | 1 - arch/arc/configs/vdk_hs38_smp_defconfig | 2 -- arch/arc/kernel/head.S | 2 -- arch/arc/kernel/setup.c | 2 -- 6 files changed, 20 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 7215f52b3413..d750b302d5ab 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -191,7 +191,6 @@ config NR_CPUS config ARC_SMP_HALT_ON_RESET bool "Enable Halt-on-reset boot mode" - default y if ARC_UBOOT_SUPPORT help In SMP configuration cores can be configured as Halt-on-reset or they could all start at same time. For Halt-on-reset, non @@ -523,17 +522,6 @@ config ARC_DBG_TLB_PARANOIA endif -config ARC_UBOOT_SUPPORT - bool "Support uboot arg Handling" - help - ARC Linux by default checks for uboot provided args as pointers to - external cmdline or DTB. This however breaks in absence of uboot, - when booting from Metaware debugger directly, as the registers are - not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus - registers look like uboot args to kernel which then chokes. - So only enable the uboot arg checking/processing if users are sure - of uboot being in play. - config ARC_BUILTIN_DTB_NAME string "Built in DTB" help diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 6e84060e7c90..621f59407d76 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -31,7 +31,6 @@ CONFIG_ARC_CACHE_LINE_SHIFT=5 # CONFIG_ARC_HAS_LLSC is not set CONFIG_ARC_KVADDR_SIZE=402 CONFIG_ARC_EMUL_UNALIGNED=y -CONFIG_ARC_UBOOT_SUPPORT=y CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_UNIX=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 1e59a2e9c602..e447ace6fa1c 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -13,7 +13,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_UBOOT_SUPPORT=y CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38" CONFIG_PREEMPT=y CONFIG_NET=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index b5c3f6c54b03..c82cdb10aaf4 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -15,8 +15,6 @@ CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -# CONFIG_ARC_SMP_HALT_ON_RESET is not set -CONFIG_ARC_UBOOT_SUPPORT=y CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp" CONFIG_PREEMPT=y CONFIG_NET=y diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 1f945d0f40da..30e090625916 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -100,7 +100,6 @@ ENTRY(stext) st.ab 0, [r5, 4] 1: -#ifdef CONFIG_ARC_UBOOT_SUPPORT ; Uboot - kernel ABI ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 ; r1 = magic number (always zero as of now) @@ -108,7 +107,6 @@ ENTRY(stext) ; These are handled later in handle_uboot_args() st r0, [@uboot_tag] st r2, [@uboot_arg] -#endif ; setup "current" tsk and optionally cache it in dedicated r25 mov r9, @init_task diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 8bb156164556..93d4d6639873 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -487,7 +487,6 @@ void __init handle_uboot_args(void) bool use_embedded_dtb = true; bool append_cmdline = false; -#ifdef CONFIG_ARC_UBOOT_SUPPORT /* check that we know this tag */ if (uboot_tag != UBOOT_TAG_NONE && uboot_tag != UBOOT_TAG_CMDLINE && @@ -514,7 +513,6 @@ void __init handle_uboot_args(void) append_cmdline = true; ignore_uboot_args: -#endif if (use_embedded_dtb) { machine_desc = setup_machine_fdt(__dtb_start); -- cgit v1.2.3 From b6835ea77729e7faf4656ca637ba53f42b8ee3fd Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 8 Feb 2019 13:55:19 +0300 Subject: ARC: define ARCH_SLAB_MINALIGN = 8 The default value of ARCH_SLAB_MINALIGN in "include/linux/slab.h" is "__alignof__(unsigned long long)" which for ARC unexpectedly turns out to be 4. This is not a compiler bug, but as defined by ARC ABI [1] Thus slab allocator would allocate a struct which is 32-bit aligned, which is generally OK even if struct has long long members. There was however potetial problem when it had any atomic64_t which use LLOCKD/SCONDD instructions which are required by ISA to take 64-bit addresses. This is the problem we ran into [ 4.015732] EXT4-fs (mmcblk0p2): re-mounted. Opts: (null) [ 4.167881] Misaligned Access [ 4.172356] Path: /bin/busybox.nosuid [ 4.176004] CPU: 2 PID: 171 Comm: rm Not tainted 4.19.14-yocto-standard #1 [ 4.182851] [ 4.182851] [ECR ]: 0x000d0000 => Check Programmer's Manual [ 4.190061] [EFA ]: 0xbeaec3fc [ 4.190061] [BLINK ]: ext4_delete_entry+0x210/0x234 [ 4.190061] [ERET ]: ext4_delete_entry+0x13e/0x234 [ 4.202985] [STAT32]: 0x80080002 : IE K [ 4.207236] BTA: 0x9009329c SP: 0xbe5b1ec4 FP: 0x00000000 [ 4.212790] LPS: 0x9074b118 LPE: 0x9074b120 LPC: 0x00000000 [ 4.218348] r00: 0x00000040 r01: 0x00000021 r02: 0x00000001 ... ... [ 4.270510] Stack Trace: [ 4.274510] ext4_delete_entry+0x13e/0x234 [ 4.278695] ext4_rmdir+0xe0/0x238 [ 4.282187] vfs_rmdir+0x50/0xf0 [ 4.285492] do_rmdir+0x9e/0x154 [ 4.288802] EV_Trap+0x110/0x114 The fix is to make sure slab allocations are 64-bit aligned. Do note that atomic64_t is __attribute__((aligned(8)) which means gcc does generate 64-bit aligned references, relative to beginning of container struct. However the issue is if the container itself is not 64-bit aligned, atomic64_t ends up unaligned which is what this patch ensures. [1] https://github.com/foss-for-synopsys-dwc-arc-processors/toolchain/wiki/files/ARCv2_ABI.pdf Signed-off-by: Alexey Brodkin Cc: # 4.8+ Signed-off-by: Vineet Gupta [vgupta: reworked changelog, added dependency on LL64+LLSC] --- arch/arc/include/asm/cache.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index f393b663413e..2ad77fb43639 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -52,6 +52,17 @@ #define cache_line_size() SMP_CACHE_BYTES #define ARCH_DMA_MINALIGN SMP_CACHE_BYTES +/* + * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses + * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit + * alignment for any atomic64_t embedded in buffer. + * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed + * value of 4 (and not 8) in ARC ABI. + */ +#if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC) +#define ARCH_SLAB_MINALIGN 8 +#endif + extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); -- cgit v1.2.3 From 59eb2a884f5380011179acc4662fc2cc2d850454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 14 Feb 2019 14:03:02 +0100 Subject: i40e: fix XDP_REDIRECT/XDP xmit ring cleanup race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the driver clears the XDP xmit ring due to re-configuration or teardown, in-progress ndo_xdp_xmit must be taken into consideration. The ndo_xdp_xmit function is typically called from a NAPI context that the driver does not control. Therefore, we must be careful not to clear the XDP ring, while the call is on-going. This patch adds a synchronize_rcu() to wait for napi(s) (preempt-disable regions and softirqs), prior clearing the queue. Further, the __I40E_CONFIG_BUSY flag is checked in the ndo_xdp_xmit implementation to avoid touching the XDP xmit queue during re-configuration. Fixes: d9314c474d4f ("i40e: add support for XDP_REDIRECT") Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Reported-by: Maciej Fijalkowski Signed-off-by: Björn Töpel Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 14 ++++++++++++-- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 4 +++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3a0990de81c1..e4ff531db14a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6728,8 +6728,13 @@ void i40e_down(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); - if (i40e_enabled_xdp_vsi(vsi)) + if (i40e_enabled_xdp_vsi(vsi)) { + /* Make sure that in-progress ndo_xdp_xmit + * calls are completed. + */ + synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[i]); + } i40e_clean_rx_ring(vsi->rx_rings[i]); } @@ -11966,8 +11971,13 @@ static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair) static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair) { i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); - if (i40e_enabled_xdp_vsi(vsi)) + if (i40e_enabled_xdp_vsi(vsi)) { + /* Make sure that in-progress ndo_xdp_xmit calls are + * completed. + */ + synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]); + } i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index a7e14e98889f..6c97667d20ef 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3709,6 +3709,7 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id(); struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; struct i40e_ring *xdp_ring; int drops = 0; int i; @@ -3716,7 +3717,8 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (test_bit(__I40E_VSI_DOWN, vsi->state)) return -ENETDOWN; - if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) + if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs || + test_bit(__I40E_CONFIG_BUSY, pf->state)) return -ENXIO; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) -- cgit v1.2.3 From b7dc5a071ddf69c0350396b203cba32fe5bab510 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sat, 16 Feb 2019 16:10:39 +0300 Subject: parisc: Fix ptrace syscall number modification Commit 910cd32e552e ("parisc: Fix and enable seccomp filter support") introduced a regression in ptrace-based syscall tampering: when tracer changes syscall number to -1, the kernel fails to initialize %r28 with -ENOSYS and subsequently fails to return the error code of the failed syscall to userspace. This erroneous behaviour could be observed with a simple strace syscall fault injection command which is expected to print something like this: $ strace -a0 -ewrite -einject=write:error=enospc echo hello write(1, "hello\n", 6) = -1 ENOSPC (No space left on device) (INJECTED) write(2, "echo: ", 6) = -1 ENOSPC (No space left on device) (INJECTED) write(2, "write error", 11) = -1 ENOSPC (No space left on device) (INJECTED) write(2, "\n", 1) = -1 ENOSPC (No space left on device) (INJECTED) +++ exited with 1 +++ After commit 910cd32e552ea09caa89cdbe328e468979b030dd it loops printing something like this instead: write(1, "hello\n", 6../strace: Failed to tamper with process 12345: unexpectedly got no error (return value 0, error 0) ) = 0 (INJECTED) This bug was found by strace test suite. Fixes: 910cd32e552e ("parisc: Fix and enable seccomp filter support") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Dmitry V. Levin Tested-by: Helge Deller Signed-off-by: Helge Deller --- arch/parisc/kernel/ptrace.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 2582df1c529b..0964c236e3e5 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -308,15 +308,29 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { + if (test_thread_flag(TIF_SYSCALL_TRACE)) { + int rc = tracehook_report_syscall_entry(regs); + /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip - * the system call and the system call restart handling. + * As tracesys_next does not set %r28 to -ENOSYS + * when %r20 is set to -1, initialize it here. */ - regs->gr[20] = -1UL; - goto out; + regs->gr[28] = -ENOSYS; + + if (rc) { + /* + * A nonzero return code from + * tracehook_report_syscall_entry() tells us + * to prevent the syscall execution. Skip + * the syscall call and the syscall restart handling. + * + * Note that the tracer may also just change + * regs->gr[20] to an invalid syscall number, + * that is handled by tracesys_next. + */ + regs->gr[20] = -1UL; + return -1; + } } /* Do the secure computing check after ptrace. */ @@ -340,7 +354,6 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); -out: /* * Sign extend the syscall number to 64bit since it may have been * modified by a compat ptrace call -- cgit v1.2.3 From c685c69fba71462c3f9f6a1fb6151cded6c74d42 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Wed, 20 Feb 2019 15:20:14 +0000 Subject: ixgbe: don't do any AF_XDP zero-copy transmit if netif is not OK An issue has been found while testing zero-copy XDP that causes a reset to be triggered. As it takes some time to turn the carrier on after setting zc, and we already start trying to transmit some packets, watchdog considers this as an erroneous state and triggers a reset. Don't do any work if netif carrier is not OK. Fixes: 8221c5eba8c13 (ixgbe: add AF_XDP zero-copy Tx support) Signed-off-by: Jan Sokolowski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 654ae92342ea..36a8879536a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -642,7 +642,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) dma_addr_t dma; while (budget-- > 0) { - if (unlikely(!ixgbe_desc_unused(xdp_ring))) { + if (unlikely(!ixgbe_desc_unused(xdp_ring)) || + !netif_carrier_ok(xdp_ring->netdev)) { work_done = false; break; } -- cgit v1.2.3 From 71d73a0b43c2b101a960c624290c8a053d174cac Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 21 Feb 2019 20:16:10 +0100 Subject: CREDITS/MAINTAINERS: Retire parisc-linux.org email domain Retire the parisc-linux.org email domain and provide alternative email addresses for the remaining users, as agreed upon with them. Signed-off-by: Helge Deller --- CREDITS | 20 +++++++++----------- MAINTAINERS | 5 ++--- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/CREDITS b/CREDITS index e818eb6a3e71..0175098d4776 100644 --- a/CREDITS +++ b/CREDITS @@ -842,10 +842,9 @@ D: ax25-utils maintainer. N: Helge Deller E: deller@gmx.de -E: hdeller@redhat.de -D: PA-RISC Linux hacker, LASI-, ASP-, WAX-, LCD/LED-driver -S: Schimmelsrain 1 -S: D-69231 Rauenberg +W: http://www.parisc-linux.org/ +D: PA-RISC Linux architecture maintainer +D: LASI-, ASP-, WAX-, LCD/LED-driver S: Germany N: Jean Delvare @@ -1361,7 +1360,7 @@ S: Stellenbosch, Western Cape S: South Africa N: Grant Grundler -E: grundler@parisc-linux.org +E: grantgrundler@gmail.com W: http://obmouse.sourceforge.net/ W: http://www.parisc-linux.org/ D: obmouse - rewrote Olivier Florent's Omnibook 600 "pop-up" mouse driver @@ -2492,7 +2491,7 @@ S: Syracuse, New York 13206 S: USA N: Kyle McMartin -E: kyle@parisc-linux.org +E: kyle@mcmartin.ca D: Linux/PARISC hacker D: AD1889 sound driver S: Ottawa, Canada @@ -3780,14 +3779,13 @@ S: 21513 Conradia Ct S: Cupertino, CA 95014 S: USA -N: Thibaut Varene -E: T-Bone@parisc-linux.org -W: http://www.parisc-linux.org/~varenet/ -P: 1024D/B7D2F063 E67C 0D43 A75E 12A5 BB1C FA2F 1E32 C3DA B7D2 F063 +N: Thibaut Varène +E: hacks+kernel@slashdirt.org +W: http://hacks.slashdirt.org/ D: PA-RISC port minion, PDC and GSCPS2 drivers, debuglocks and other bits D: Some ARM at91rm9200 bits, S1D13XXX FB driver, random patches here and there D: AD1889 sound driver -S: Paris, France +S: France N: Heikki Vatiainen E: hessu@cs.tut.fi diff --git a/MAINTAINERS b/MAINTAINERS index 41ce5f4ad838..e6e17d8c5aae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -409,8 +409,7 @@ F: drivers/platform/x86/wmi.c F: include/uapi/linux/wmi.h AD1889 ALSA SOUND DRIVER -M: Thibaut Varene -W: http://wiki.parisc-linux.org/AD1889 +W: https://parisc.wiki.kernel.org/index.php/AD1889 L: linux-parisc@vger.kernel.org S: Maintained F: sound/pci/ad1889.* @@ -11488,7 +11487,7 @@ F: Documentation/blockdev/paride.txt F: drivers/block/paride/ PARISC ARCHITECTURE -M: "James E.J. Bottomley" +M: "James E.J. Bottomley" M: Helge Deller L: linux-parisc@vger.kernel.org W: http://www.parisc-linux.org/ -- cgit v1.2.3 From 18de100ed6f0d3bf74036de9fd4528f208d585e6 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 21 Feb 2019 20:58:16 +0100 Subject: MAINTAINERS: mark CAIF as orphan The listed address for the CAIF maintainer bounces with "553 5.3.0 ... No such user here", and the only existing email address of the maintainer in git history hasn't responded in a week. Therefore, remove the listed maintainer and mark CAIF as orphan. Signed-off-by: Jann Horn Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 41ce5f4ad838..98457a87b238 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3390,9 +3390,8 @@ F: Documentation/media/v4l-drivers/cafe_ccic* F: drivers/media/platform/marvell-ccic/ CAIF NETWORK LAYER -M: Dmitry Tarnyagin L: netdev@vger.kernel.org -S: Supported +S: Orphan F: Documentation/networking/caif/ F: drivers/net/caif/ F: include/uapi/linux/caif/ -- cgit v1.2.3 From ad49bc6361ca29e3318b6f71a6fc361d2a8c9f26 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 18 Feb 2019 17:14:25 +0800 Subject: net: vrf: remove MTU limits for vrf device Similiar to commit e94cd8113ce63 ("net: remove MTU limits for dummy and ifb device"), MTU is irrelevant for VRF device. We init it as 64K while limit it to [68, 1500] may make users feel confused. Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 95909e262ba4..7c1430ed0244 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1273,6 +1273,9 @@ static void vrf_setup(struct net_device *dev) /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; + + dev->min_mtu = 0; + dev->max_mtu = 0; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], -- cgit v1.2.3 From 3c963a3306eada999be5ebf4f293dfa3d3945487 Mon Sep 17 00:00:00 2001 From: Michal Soltys Date: Mon, 18 Feb 2019 17:55:28 +0100 Subject: bonding: fix PACKET_ORIGDEV regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a subtle PACKET_ORIGDEV regression which was a side effect of fixes introduced by: 6a9e461f6fe4 bonding: pass link-local packets to bonding master also. ... to: b89f04c61efe bonding: deliver link-local packets with skb->dev set to link that packets arrived on While 6a9e461f6fe4 restored pre-b89f04c61efe presence of link-local packets on bonding masters (which is required e.g. by linux bridges participating in spanning tree or needed for lab-like setups created with group_fwd_mask) it also caused the originating device information to be lost due to cloning. Maciej Żenczykowski proposed another solution that doesn't require packet cloning and retains original device information - instead of returning RX_HANDLER_PASS for all link-local packets it's now limited only to packets from inactive slaves. At the same time, packets passed to bonding masters retain correct information about the originating device and PACKET_ORIGDEV can be used to determine it. This elegantly solves all issues so far: - link-local packets that were removed from bonding masters - LLDP daemons being forced to explicitly bind to slave interfaces - PACKET_ORIGDEV having no effect on bond interfaces Fixes: 6a9e461f6fe4 (bonding: pass link-local packets to bonding master also.) Reported-by: Vincent Bernat Signed-off-by: Michal Soltys Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 485462d3087f..537c90c8eb0a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1183,29 +1183,22 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } } - /* Link-local multicast packets should be passed to the - * stack on the link they arrive as well as pass them to the - * bond-master device. These packets are mostly usable when - * stack receives it with the link on which they arrive - * (e.g. LLDP) they also must be available on master. Some of - * the use cases include (but are not limited to): LLDP agents - * that must be able to operate both on enslaved interfaces as - * well as on bonds themselves; linux bridges that must be able - * to process/pass BPDUs from attached bonds when any kind of - * STP version is enabled on the network. + /* + * For packets determined by bond_should_deliver_exact_match() call to + * be suppressed we want to make an exception for link-local packets. + * This is necessary for e.g. LLDP daemons to be able to monitor + * inactive slave links without being forced to bind to them + * explicitly. + * + * At the same time, packets that are passed to the bonding master + * (including link-local ones) can have their originating interface + * determined via PACKET_ORIGDEV socket option. */ - if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (nskb) { - nskb->dev = bond->dev; - nskb->queue_mapping = 0; - netif_rx(nskb); - } - return RX_HANDLER_PASS; - } - if (bond_should_deliver_exact_match(skb, slave, bond)) + if (bond_should_deliver_exact_match(skb, slave, bond)) { + if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) + return RX_HANDLER_PASS; return RX_HANDLER_EXACT; + } skb->dev = bond->dev; -- cgit v1.2.3 From 223b7329ec6a0dae1b7f7db7b770e93f4a069ef9 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 19 Feb 2019 11:20:47 +0700 Subject: tipc: improve function tipc_wait_for_cond() Commit 844cf763fba6 ("tipc: make macro tipc_wait_for_cond() smp safe") replaced finish_wait() with remove_wait_queue() but still used prepare_to_wait(). This causes unnecessary conditional checking before adding to wait queue in prepare_to_wait(). This commit replaces prepare_to_wait() with add_wait_queue() as the pair function with remove_wait_queue(). Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1217c90a363b..81b87916a0eb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -388,7 +388,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) rc_ = tipc_sk_sock_err((sock_), timeo_); \ if (rc_) \ break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + add_wait_queue(sk_sleep(sk_), &wait_); \ release_sock(sk_); \ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ sched_annotate_sleep(); \ -- cgit v1.2.3 From 48766a583c7961af080de2df692f476624a9a21a Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 19 Feb 2019 11:20:48 +0700 Subject: tipc: improve function tipc_wait_for_rcvmsg() This commit replaces schedule_timeout() with wait_woken() in function tipc_wait_for_rcvmsg(). wait_woken() uses memory barriers in its implementation to avoid potential race condition when putting a process into sleeping state and then waking it up. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/socket.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 81b87916a0eb..684f2125fc6b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1677,7 +1677,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk) static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); long timeo = *timeop; int err = sock_error(sk); @@ -1685,15 +1685,17 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) return err; for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sk->sk_shutdown & RCV_SHUTDOWN) { err = -ENOTCONN; break; } + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + sched_annotate_sleep(); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -1709,7 +1711,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) if (err) break; } - finish_wait(sk_sleep(sk), &wait); *timeop = timeo; return err; } -- cgit v1.2.3 From 9e8db5913264d3967b93c765a6a9e464d9c473db Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 18 Feb 2019 23:37:12 -0500 Subject: net: avoid false positives in untrusted gso validation GSO packets with vnet_hdr must conform to a small set of gso_types. The below commit uses flow dissection to drop packets that do not. But it has false positives when the skb is not fully initialized. Dissection needs skb->protocol and skb->network_header. Infer skb->protocol from gso_type as the two must agree. SKB_GSO_UDP can use both ipv4 and ipv6, so try both. Exclude callers for which network header offset is not known. Fixes: d5be7f632bad ("net: validate untrusted gso packets without csum offload") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 71f2394abbf7..e0348cb0a1dd 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -61,10 +61,20 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ - if (gso_type) { + if (gso_type && skb->network_header) { + if (!skb->protocol) + virtio_net_hdr_set_proto(skb, hdr); +retry: skb_probe_transport_header(skb, -1); - if (!skb_transport_header_was_set(skb)) + if (!skb_transport_header_was_set(skb)) { + /* UFO does not specify ipv4 or 6: try both */ + if (gso_type & SKB_GSO_UDP && + skb->protocol == htons(ETH_P_IP)) { + skb->protocol = htons(ETH_P_IPV6); + goto retry; + } return -EINVAL; + } } } -- cgit v1.2.3 From 7b2e932f633bcb7b190fc7031ce6dac75f8c3472 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 21 Feb 2019 13:44:49 -0800 Subject: ARCv2: don't assume core 0x54 has dual issue The first release of core4 (0x54) was dual issue only (HS4x). Newer releases allow hardware to be configured as single issue (HS3x) or dual issue. Prevent accessing a HS4x only aux register in HS3x, which otherwise leads to illegal instruction exceptions Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 8 ++++++++ arch/arc/kernel/setup.c | 26 +++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index f1b86cef0905..a27eafdc8260 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -151,6 +151,14 @@ struct bcr_isa_arcv2 { #endif }; +struct bcr_uarch_build_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:8, prod:8, maj:8, min:8; +#else + unsigned int min:8, maj:8, prod:8, pad:8; +#endif +}; + struct bcr_mpy { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 93d4d6639873..7b2340996cf8 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,13 +199,29 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.ret_stk = 4 << bpu.rse; if (cpu->core.family >= 0x54) { - unsigned int exec_ctrl; - READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_enb = !(exec_ctrl & 1); + struct bcr_uarch_build_arcv2 uarch; - /* dual issue always present for this core */ - cpu->extn.dual = 1; + /* + * The first 0x54 core (uarch maj:min 0:1 or 0:2) was + * dual issue only (HS4x). But next uarch rev (1:0) + * allows it be configured for single issue (HS3x) + * Ensure we fiddle with dual issue only on HS4x + */ + READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); + + if (uarch.prod == 4) { + unsigned int exec_ctrl; + + /* dual issue hardware always present */ + cpu->extn.dual = 1; + + READ_BCR(AUX_EXEC_CTRL, exec_ctrl); + + /* dual issue hardware enabled ? */ + cpu->extn.dual_enb = !(exec_ctrl & 1); + + } } } -- cgit v1.2.3 From 2bdf700e538828d6456150b9319e5f689b062d54 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 19 Feb 2019 17:42:05 +0100 Subject: net: ip_gre: do not report erspan_ver for gre or gretap Report erspan version field to userspace in ipgre_fill_info just for erspan tunnels. The issue can be triggered with the following reproducer: $ip link add name gre1 type gre local 192.168.0.1 remote 192.168.1.1 $ip link set dev gre1 up $ip -d link sh gre1 13: gre1@NONE: mtu 1476 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000 link/gre 192.168.0.1 peer 192.168.1.1 promiscuity 0 minmtu 0 maxmtu 0 gre remote 192.168.1.1 local 192.168.0.1 ttl inherit erspan_ver 0 addrgenmode eui64 numtxqueues 1 numrxqueues 1 Fixes: f551c91de262 ("net: erspan: introduce erspan v2 for ip_gre") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 3978f807fa8b..6ae89f2b541b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1457,9 +1457,23 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if ((t->erspan_ver == 1 || t->erspan_ver == 2) && - !t->collect_md) - o_flags |= TUNNEL_KEY; + if (t->erspan_ver == 1 || t->erspan_ver == 2) { + if (!t->collect_md) + o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) + goto nla_put_failure; + + if (t->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) + goto nla_put_failure; + } else { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) + goto nla_put_failure; + } + } if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, @@ -1495,19 +1509,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) - goto nla_put_failure; - - if (t->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) - goto nla_put_failure; - } else if (t->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) - goto nla_put_failure; - } - return 0; nla_put_failure: -- cgit v1.2.3 From 103d0244d29fcaf38f1339d4538919bbbc051490 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 19 Feb 2019 17:42:06 +0100 Subject: net: ip6_gre: do not report erspan_ver for ip6gre or ip6gretap Report erspan version field to userspace in ip6gre_fill_info just for erspan_v6 tunnels. Moreover report IFLA_GRE_ERSPAN_INDEX only for erspan version 1. The issue can be triggered with the following reproducer: $ip link add name gre6 type ip6gre local 2001::1 remote 2002::2 $ip link set gre6 up $ip -d link sh gre6 14: grep6@NONE: mtu 1448 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000 link/gre6 2001::1 peer 2002::2 promiscuity 0 minmtu 0 maxmtu 0 ip6gre remote 2002::2 local 2001::1 hoplimit 64 encaplimit 4 tclass 0x00 flowlabel 0x00000 erspan_index 0 erspan_ver 0 addrgenmode eui64 Fixes: 94d7d8f29287 ("ip6_gre: add erspan v2 support") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 43890898b0b5..0fdd0109d131 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2104,9 +2104,23 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if ((p->erspan_ver == 1 || p->erspan_ver == 2) && - !p->collect_md) - o_flags |= TUNNEL_KEY; + if (p->erspan_ver == 1 || p->erspan_ver == 2) { + if (!p->collect_md) + o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) + goto nla_put_failure; + + if (p->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + goto nla_put_failure; + } else { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) + goto nla_put_failure; + } + } if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, @@ -2121,8 +2135,7 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || - nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) || - nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -2140,19 +2153,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) - goto nla_put_failure; - - if (p->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) - goto nla_put_failure; - } else if (p->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) - goto nla_put_failure; - } - return 0; nla_put_failure: -- cgit v1.2.3 From 6321aa197547da397753757bd84c6ce64b3e3d89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Feb 2019 22:53:50 +0100 Subject: phonet: fix building with clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang warns about overflowing the data[] member in the struct pnpipehdr: net/phonet/pep.c:295:8: warning: array index 4 is past the end of the array (which contains 1 element) [-Warray-bounds] if (hdr->data[4] == PEP_IND_READY) ^ ~ include/net/phonet/pep.h:66:3: note: array 'data' declared here u8 data[1]; Using a flexible array member at the end of the struct avoids the warning, but since we cannot have a flexible array member inside of the union, each index now has to be moved back by one, which makes it a little uglier. Signed-off-by: Arnd Bergmann Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 5 +++-- net/phonet/pep.c | 32 ++++++++++++++++---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index b669fe6dbc3b..98f31c7ea23d 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -63,10 +63,11 @@ struct pnpipehdr { u8 state_after_reset; /* reset request */ u8 error_code; /* any response */ u8 pep_type; /* status indication */ - u8 data[1]; + u8 data0; /* anything else */ }; + u8 data[]; }; -#define other_pep_type data[1] +#define other_pep_type data[0] static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) { diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 9fc76b19cd3c..db3473540303 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code, ph->utid = 0; ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code, ph->utid = id; /* whatever */ ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct pnpipehdr *ph; struct sockaddr_pn dst; u8 data[4] = { - oph->data[0], /* PEP type */ + oph->pep_type, /* PEP type */ code, /* error code, at an unusual offset */ PAD, PAD, }; @@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; - ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data0 = oph->data[0]; /* CTRL id */ pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) return -EINVAL; hdr = pnp_hdr(skb); - if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + if (hdr->pep_type != PN_PEP_TYPE_COMMON) { net_dbg_ratelimited("Phonet unknown PEP type: %u\n", - (unsigned int)hdr->data[0]); + (unsigned int)hdr->pep_type); return -EOPNOTSUPP; } - switch (hdr->data[1]) { + switch (hdr->data[0]) { case PN_PEP_IND_FLOW_CONTROL: switch (pn->tx_fc) { case PN_LEGACY_FLOW_CONTROL: - switch (hdr->data[4]) { + switch (hdr->data[3]) { case PEP_IND_BUSY: atomic_set(&pn->tx_credits, 0); break; @@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) } break; case PN_ONE_CREDIT_FLOW_CONTROL: - if (hdr->data[4] == PEP_IND_READY) + if (hdr->data[3] == PEP_IND_READY) atomic_set(&pn->tx_credits, wake = 1); break; } @@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) break; - atomic_add(wake = hdr->data[4], &pn->tx_credits); + atomic_add(wake = hdr->data[3], &pn->tx_credits); break; default: net_dbg_ratelimited("Phonet unknown PEP indication: %u\n", - (unsigned int)hdr->data[1]); + (unsigned int)hdr->data[0]); return -EOPNOTSUPP; } if (wake) @@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); - u8 n_sb = hdr->data[0]; + u8 n_sb = hdr->data0; pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; __skb_pull(skb, sizeof(*hdr)); @@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return -ECONNREFUSED; /* Parse sub-blocks */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[6], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk) ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = PAD; + ph->data0 = PAD; return pn_skb_send(sk, skb, NULL); } @@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, peer_type = hdr->other_pep_type << 8; /* Parse sub-blocks (options) */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[1], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) ph->utid = 0; if (pn->aligned) { ph->message_id = PNS_PIPE_ALIGNED_DATA; - ph->data[0] = 0; /* padding */ + ph->data0 = 0; /* padding */ } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; -- cgit v1.2.3 From 17407715240456448e4989bee46ffc93991add83 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 13:12:40 +0800 Subject: mac80211_hwsim: propagate genlmsg_reply return code genlmsg_reply can fail, so propagate its return code Signed-off-by: Li RongQing Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 320edcac4699..6359053bd0c7 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3554,7 +3554,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) goto out_err; } - genlmsg_reply(skb, info); + res = genlmsg_reply(skb, info); break; } -- cgit v1.2.3 From 5c14a4d05f68415af9e41a4e667d1748d41d1baf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 21 Feb 2019 18:29:36 +0100 Subject: mac80211: Change default tx_sk_pacing_shift to 7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we did the original tests for the optimal value of sk_pacing_shift, we came up with 6 ms of buffering as the default. Sadly, 6 is not a power of two, so when picking the shift value I erred on the size of less buffering and picked 4 ms instead of 8. This was probably wrong; those 2 ms of extra buffering makes a larger difference than I thought. So, change the default pacing shift to 7, which corresponds to 8 ms of buffering. The point of diminishing returns really kicks in after 8 ms, and so having this as a default should cut down on the need for extensive per-device testing and overrides needed in the drivers. Cc: stable@vger.kernel.org Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 87a729926734..977dea436ee8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -615,13 +615,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, * We need a bit of data queued to build aggregates properly, so * instruct the TCP stack to allow more than a single ms of data * to be queued in the stack. The value is a bit-shift of 1 - * second, so 8 is ~4ms of queued data. Only affects local TCP + * second, so 7 is ~8ms of queued data. Only affects local TCP * sockets. * This is the default, anyhow - drivers may need to override it * for local reasons (longer buffers, longer completion time, or * similar). */ - local->hw.tx_sk_pacing_shift = 8; + local->hw.tx_sk_pacing_shift = 7; /* set up some defaults */ local->hw.queues = 1; -- cgit v1.2.3 From 51d0af222f6fa43134c6187ab4f374630f6e0d96 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 22 Feb 2019 13:21:15 +0100 Subject: mac80211: allocate tailroom for forwarded mesh packets Forwarded packets enter the tx path through ieee80211_add_pending_skb, which skips the ieee80211_skb_resize call. Fixes WARN_ON in ccmp_encrypt_skb and resulting packet loss. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bb4d71efb6fb..c2a6da5d80da 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2644,6 +2644,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u16 ac, q, hdrlen; + int tailroom = 0; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -2732,8 +2733,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!ifmsh->mshcfg.dot11MeshForwarding) goto out; + if (sdata->crypto_tx_tailroom_needed_cnt) + tailroom = IEEE80211_ENCRYPT_TAILROOM; + fwd_skb = skb_copy_expand(skb, local->tx_headroom + - sdata->encrypt_headroom, 0, GFP_ATOMIC); + sdata->encrypt_headroom, + tailroom, GFP_ATOMIC); if (!fwd_skb) goto out; -- cgit v1.2.3 From 7c0cdf0b3940f63d9777c3fcf250a2f83859ca54 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Fri, 22 Feb 2019 14:19:08 +0100 Subject: bpf, lpm: fix lookup bug in map_delete_elem trie_delete_elem() was deleting an entry even though it was not matching if the prefixlen was correct. This patch adds a check on matchlen. Reproducer: $ sudo bpftool map create /sys/fs/bpf/mylpm type lpm_trie key 8 value 1 entries 128 name mylpm flags 1 $ sudo bpftool map update pinned /sys/fs/bpf/mylpm key hex 10 00 00 00 aa bb cc dd value hex 01 $ sudo bpftool map dump pinned /sys/fs/bpf/mylpm key: 10 00 00 00 aa bb cc dd value: 01 Found 1 element $ sudo bpftool map delete pinned /sys/fs/bpf/mylpm key hex 10 00 00 00 ff ff ff ff $ echo $? 0 $ sudo bpftool map dump pinned /sys/fs/bpf/mylpm Found 0 elements A similar reproducer is added in the selftests. Without the patch: $ sudo ./tools/testing/selftests/bpf/test_lpm_map test_lpm_map: test_lpm_map.c:485: test_lpm_delete: Assertion `bpf_map_delete_elem(map_fd, key) == -1 && errno == ENOENT' failed. Aborted With the patch: test_lpm_map runs without errors. Fixes: e454cf595853 ("bpf: Implement map_delete_elem for BPF_MAP_TYPE_LPM_TRIE") Cc: Craig Gallek Signed-off-by: Alban Crequy Acked-by: Craig Gallek Signed-off-by: Daniel Borkmann --- kernel/bpf/lpm_trie.c | 1 + tools/testing/selftests/bpf/test_lpm_map.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index abf1002080df..93a5cbbde421 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -471,6 +471,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) } if (!node || node->prefixlen != key->prefixlen || + node->prefixlen != matchlen || (node->flags & LPM_TREE_NODE_FLAG_IM)) { ret = -ENOENT; goto out; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 147e34cfceb7..02d7c871862a 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -474,6 +474,16 @@ static void test_lpm_delete(void) assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && errno == ENOENT); + key->prefixlen = 30; // unused prefix so far + inet_pton(AF_INET, "192.255.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == -1 && + errno == ENOENT); + + key->prefixlen = 16; // same prefix as the root node + inet_pton(AF_INET, "192.255.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == -1 && + errno == ENOENT); + /* assert initial lookup */ key->prefixlen = 32; inet_pton(AF_INET, "192.168.0.1", key->data); -- cgit v1.2.3 From cc1780fc42c76c705dd07ea123f1143dc5057630 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Feb 2019 13:32:11 +0000 Subject: KEYS: user: Align the payload buffer Align the payload of "user" and "logon" keys so that users of the keyrings service can access it as a struct that requires more than 2-byte alignment. fscrypt currently does this which results in the read of fscrypt_key::size being misaligned as it needs 4-byte alignment. Align to __alignof__(u64) rather than __alignof__(long) since in the future it's conceivable that people would use structs beginning with u64, which on some platforms would require more than 'long' alignment. Reported-by: Aaro Koskinen Fixes: 2aa349f6e37c ("[PATCH] Keys: Export user-defined keyring operations") Fixes: 88bd6ccdcdd6 ("ext4 crypto: add encryption key management facilities") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Tested-by: Aaro Koskinen Signed-off-by: David Howells Signed-off-by: James Morris --- include/keys/user-type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/keys/user-type.h b/include/keys/user-type.h index e098cbe27db5..12babe991594 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -31,7 +31,7 @@ struct user_key_payload { struct rcu_head rcu; /* RCU destructor */ unsigned short datalen; /* length of this data */ - char data[0]; /* actual data */ + char data[0] __aligned(__alignof__(u64)); /* actual data */ }; extern struct key_type key_type_user; -- cgit v1.2.3 From ede0fa98a900e657d1fcd80b50920efc896c1a4c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 22 Feb 2019 15:36:18 +0000 Subject: KEYS: always initialize keyring_index_key::desc_len syzbot hit the 'BUG_ON(index_key->desc_len == 0);' in __key_link_begin() called from construct_alloc_key() during sys_request_key(), because the length of the key description was never calculated. The problem is that we rely on ->desc_len being initialized by search_process_keyrings(), specifically by search_nested_keyrings(). But, if the process isn't subscribed to any keyrings that never happens. Fix it by always initializing keyring_index_key::desc_len as soon as the description is set, like we already do in some places. The following program reproduces the BUG_ON() when it's run as root and no session keyring has been installed. If it doesn't work, try removing pam_keyinit.so from /etc/pam.d/login and rebooting. #include #include #include int main(void) { int id = add_key("keyring", "syz", NULL, 0, KEY_SPEC_USER_KEYRING); keyctl_setperm(id, KEY_OTH_WRITE); setreuid(5000, 5000); request_key("user", "desc", "", id); } Reported-by: syzbot+ec24e95ea483de0a24da@syzkaller.appspotmail.com Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring") Signed-off-by: Eric Biggers Signed-off-by: David Howells Cc: stable@vger.kernel.org Signed-off-by: James Morris --- security/keys/keyring.c | 4 +--- security/keys/proc.c | 3 +-- security/keys/request_key.c | 1 + security/keys/request_key_auth.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index eadebb92986a..f81372f53dd7 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -661,9 +661,6 @@ static bool search_nested_keyrings(struct key *keyring, BUG_ON((ctx->flags & STATE_CHECKS) == 0 || (ctx->flags & STATE_CHECKS) == STATE_CHECKS); - if (ctx->index_key.description) - ctx->index_key.desc_len = strlen(ctx->index_key.description); - /* Check to see if this top-level keyring is what we are looking for * and whether it is valid or not. */ @@ -914,6 +911,7 @@ key_ref_t keyring_search(key_ref_t keyring, struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, + .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, diff --git a/security/keys/proc.c b/security/keys/proc.c index d2b802072693..78ac305d715e 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -165,8 +165,7 @@ static int proc_keys_show(struct seq_file *m, void *v) int rc; struct keyring_search_context ctx = { - .index_key.type = key->type, - .index_key.description = key->description, + .index_key = key->index_key, .cred = m->file->f_cred, .match_data.cmp = lookup_user_key_possessed, .match_data.raw_data = key, diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3f56a312dd35..7a0c6b666ff0 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -531,6 +531,7 @@ struct key *request_key_and_link(struct key_type *type, struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, + .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index afc304e8b61e..bda6201c6c45 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -247,7 +247,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) struct key *authkey; key_ref_t authkey_ref; - sprintf(description, "%x", target_id); + ctx.index_key.desc_len = sprintf(description, "%x", target_id); authkey_ref = search_process_keyrings(&ctx); -- cgit v1.2.3 From ad7dc69aeb23138cc23c406cac25003b97e8ee17 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 22 Feb 2019 17:45:01 +0100 Subject: x86/kvm/mmu: fix switch between root and guest MMUs Commit 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu") brought one subtle change: previously, when switching back from L2 to L1, we were resetting MMU hooks (like mmu->get_cr3()) in kvm_init_mmu() called from nested_vmx_load_cr3() and now we do that in nested_ept_uninit_mmu_context() when we re-target vcpu->arch.mmu pointer. The change itself looks logical: if nested_ept_init_mmu_context() changes something than nested_ept_uninit_mmu_context() restores it back. There is, however, one thing: the following call chain: nested_vmx_load_cr3() kvm_mmu_new_cr3() __kvm_mmu_new_cr3() fast_cr3_switch() cached_root_available() now happens with MMU hooks pointing to the new MMU (root MMU in our case) while previously it was happening with the old one. cached_root_available() tries to stash current root but it is incorrect to read current CR3 with mmu->get_cr3(), we need to use old_mmu->get_cr3() which in case we're switching from L2 to L1 is guest_mmu. (BTW, in shadow page tables case this is a non-issue because we don't switch MMU). While we could've tried to guess that we're switching between MMUs and call the right ->get_cr3() from cached_root_available() this seems to be overly complicated. Instead, just stash the corresponding CR3 when setting root_hpa and make cached_root_available() use the stashed value. Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu") Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4660ce90de7f..593e17b7797e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -397,6 +397,7 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; + gpa_t root_cr3; union kvm_mmu_role mmu_role; u8 root_level; u8 shadow_root_level; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index da9c42349b1f..6e62ed3852ac 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3555,6 +3555,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, &invalid_list); mmu->root_hpa = INVALID_PAGE; } + mmu->root_cr3 = 0; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -3610,6 +3611,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); } else BUG(); + vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu); return 0; } @@ -3618,10 +3620,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *sp; u64 pdptr, pm_mask; - gfn_t root_gfn; + gfn_t root_gfn, root_cr3; int i; - root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT; + root_cr3 = vcpu->arch.mmu->get_cr3(vcpu); + root_gfn = root_cr3 >> PAGE_SHIFT; if (mmu_check_root(vcpu, root_gfn)) return 1; @@ -3646,7 +3649,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu->root_hpa = root; - return 0; + goto set_root_cr3; } /* @@ -3712,6 +3715,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); } +set_root_cr3: + vcpu->arch.mmu->root_cr3 = root_cr3; + return 0; } @@ -4163,7 +4169,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, struct kvm_mmu_root_info root; struct kvm_mmu *mmu = vcpu->arch.mmu; - root.cr3 = mmu->get_cr3(vcpu); + root.cr3 = mmu->root_cr3; root.hpa = mmu->root_hpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { @@ -4176,6 +4182,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, } mmu->root_hpa = root.hpa; + mmu->root_cr3 = root.cr3; return i < KVM_MMU_NUM_PREV_ROOTS; } @@ -5516,11 +5523,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; + vcpu->arch.root_mmu.root_cr3 = 0; vcpu->arch.root_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; + vcpu->arch.guest_mmu.root_cr3 = 0; vcpu->arch.guest_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; -- cgit v1.2.3 From 511da98d207d5c0675a10351b01e37cbe50a79e5 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 1 Feb 2019 00:09:43 +0800 Subject: kvm: x86: Return LA57 feature based on hardware capability Previously, 'commit 372fddf70904 ("x86/mm: Introduce the 'no5lvl' kernel parameter")' cleared X86_FEATURE_LA57 in boot_cpu_data, if Linux chooses to not run in 5-level paging mode. Yet boot_cpu_data is queried by do_cpuid_ent() as the host capability later when creating vcpus, and Qemu will not be able to detect this feature and create VMs with LA57 feature. As discussed earlier, VMs can still benefit from extended linear address width, e.g. to enhance features like ASLR. So we would like to fix this, by return the true hardware capability when Qemu queries. Signed-off-by: Yu Zhang Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bbffa6c54697..c07958b59f50 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -335,6 +335,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; + unsigned f_la57 = 0; /* cpuid 1.edx */ const u32 kvm_cpuid_1_edx_x86_features = @@ -489,7 +490,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, // TSC_ADJUST is emulated entry->ebx |= F(TSC_ADJUST); entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; + f_la57 = entry->ecx & F(LA57); cpuid_mask(&entry->ecx, CPUID_7_ECX); + /* Set LA57 based on hardware capability. */ + entry->ecx |= f_la57; entry->ecx |= f_umip; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) -- cgit v1.2.3 From de3ccd26fafc707b09792d9b633c8b5b48865315 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 1 Feb 2019 00:09:23 +0800 Subject: KVM: MMU: record maximum physical address width in kvm_mmu_extended_role Previously, commit 7dcd57552008 ("x86/kvm/mmu: check if tdp/shadow MMU reconfiguration is needed") offered some optimization to avoid the unnecessary reconfiguration. Yet one scenario is broken - when cpuid changes VM's maximum physical address width, reconfiguration is needed to reset the reserved bits. Also, the TDP may need to reset its shadow_root_level when this value is changed. To fix this, a new field, maxphyaddr, is introduced in the extended role structure to keep track of the configured guest physical address width. Signed-off-by: Yu Zhang Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 593e17b7797e..180373360e34 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -299,6 +299,7 @@ union kvm_mmu_extended_role { unsigned int cr4_smap:1; unsigned int cr4_smep:1; unsigned int cr4_la57:1; + unsigned int maxphyaddr:6; }; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e62ed3852ac..f2d1d230d5b8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4777,6 +4777,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu) ext.cr4_pse = !!is_pse(vcpu); ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57); + ext.maxphyaddr = cpuid_maxphyaddr(vcpu); ext.valid = 1; -- cgit v1.2.3 From d1f20c03f48102e52eb98b8651d129b83134cae4 Mon Sep 17 00:00:00 2001 From: Maciej Kwiecien Date: Fri, 22 Feb 2019 09:45:26 +0100 Subject: sctp: don't compare hb_timer expire date before starting it hb_timer might not start at all for a particular transport because its start is conditional. In a result a node is not sending heartbeats. Function sctp_transport_reset_hb_timer has two roles: - initial start of hb_timer for a given transport, - update expire date of hb_timer for a given transport. The function is optimized to update timer's expire only if it is before a new calculated one but this comparison is invalid for a timer which has not yet started. Such a timer has expire == 0 and if a new expire value is bigger than (MAX_JIFFIES / 2 + 2) then "time_before" macro will fail and timer will not start resulting in no heartbeat packets send by the node. This was found when association was initialized within first 5 mins after system boot due to jiffies init value which is near to MAX_JIFFIES. Test kernel version: 4.9.154 (ARCH=arm) hb_timer.expire = 0; //initialized, not started timer new_expire = MAX_JIFFIES / 2 + 2; //or more time_before(hb_timer.expire, new_expire) == false Fixes: ba6f5e33bdbb ("sctp: avoid refreshing heartbeat timer too often") Reported-by: Marcin Stojek Tested-by: Marcin Stojek Signed-off-by: Maciej Kwiecien Reviewed-by: Alexander Sverdlin Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 033696e6f74f..ad158d311ffa 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -207,7 +207,8 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport) /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); - if (time_before(transport->hb_timer.expires, expires) && + if ((time_before(transport->hb_timer.expires, expires) || + !timer_pending(&transport->hb_timer)) && !mod_timer(&transport->hb_timer, expires + prandom_u32_max(transport->rto))) sctp_transport_hold(transport); -- cgit v1.2.3 From 7cc9f7003a969d359f608ebb701d42cafe75b84a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Feb 2019 00:15:30 +0100 Subject: ipvlan: disallow userns cap_net_admin to change global mode/flags When running Docker with userns isolation e.g. --userns-remap="default" and spawning up some containers with CAP_NET_ADMIN under this realm, I noticed that link changes on ipvlan slave device inside that container can affect all devices from this ipvlan group which are in other net namespaces where the container should have no permission to make changes to, such as the init netns, for example. This effectively allows to undo ipvlan private mode and switch globally to bridge mode where slaves can communicate directly without going through hostns, or it allows to switch between global operation mode (l2/l3/l3s) for everyone bound to the given ipvlan master device. libnetwork plugin here is creating an ipvlan master and ipvlan slave in hostns and a slave each that is moved into the container's netns upon creation event. * In hostns: # ip -d a [...] 8: cilium_host@bond0: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l3 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.0.1/32 scope link cilium_host valid_lft forever preferred_lft forever [...] * Spawn container & change ipvlan mode setting inside of it: # docker run -dt --cap-add=NET_ADMIN --network cilium-net --name client -l app=test cilium/netperf 9fff485d69dcb5ce37c9e33ca20a11ccafc236d690105aadbfb77e4f4170879c # docker exec -ti client ip -d a [...] 10: cilium0@if4: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l3 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0 valid_lft forever preferred_lft forever # docker exec -ti client ip link change link cilium0 name cilium0 type ipvlan mode l2 # docker exec -ti client ip -d a [...] 10: cilium0@if4: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0 valid_lft forever preferred_lft forever * In hostns (mode switched to l2): # ip -d a [...] 8: cilium_host@bond0: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.0.1/32 scope link cilium_host valid_lft forever preferred_lft forever [...] Same l3 -> l2 switch would also happen by creating another slave inside the container's network namespace when specifying the existing cilium0 link to derive the actual (bond0) master: # docker exec -ti client ip link add link cilium0 name cilium1 type ipvlan mode l2 # docker exec -ti client ip -d a [...] 2: cilium1@if4: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 10: cilium0@if4: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0 valid_lft forever preferred_lft forever * In hostns: # ip -d a [...] 8: cilium_host@bond0: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.0.1/32 scope link cilium_host valid_lft forever preferred_lft forever [...] One way to mitigate it is to check CAP_NET_ADMIN permissions of the ipvlan master device's ns, and only then allow to change mode or flags for all devices bound to it. Above two cases are then disallowed after the patch. Signed-off-by: Daniel Borkmann Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 7cdac77d0c68..07e41c42bcf5 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -499,6 +499,8 @@ static int ipvlan_nl_changelink(struct net_device *dev, if (!data) return 0; + if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; if (data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); @@ -601,6 +603,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, struct ipvl_dev *tmp = netdev_priv(phy_dev); phy_dev = tmp->phy_dev; + if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; } else if (!netif_is_ipvlan_port(phy_dev)) { /* Exit early if the underlying link is invalid or busy */ if (phy_dev->type != ARPHRD_ETHER || -- cgit v1.2.3 From c286909fe5458f69e533c845b757fd2c35064d26 Mon Sep 17 00:00:00 2001 From: David Chen Date: Wed, 20 Feb 2019 13:47:19 +0800 Subject: r8152: Fix an error on RTL8153-BD MAC Address Passthrough support RTL8153-BD is used in Dell DA300 type-C dongle. Added RTL8153-BD support to activate MAC address pass through on DA300. Apply correction on previously submitted patch in net.git tree. Signed-off-by: David Chen Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ada6baf8847a..86c8c64fbb0f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1179,7 +1179,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) } else { /* test for RTL8153-BND and RTL8153-BD */ ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1); - if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK)) { + if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK) == 0) { netif_dbg(tp, probe, tp->netdev, "Invalid variant for MAC pass through\n"); return -ENODEV; -- cgit v1.2.3 From 8c7a77267eec81dd81af8412f29e50c0b1082548 Mon Sep 17 00:00:00 2001 From: George Wilkie Date: Wed, 20 Feb 2019 08:19:11 +0000 Subject: team: use operstate consistently for linkup When a port is added to a team, its initial state is derived from netif_carrier_ok rather than netif_oper_up. If it is carrier up but operationally down at the time of being added, the port state.linkup will be set prematurely. port state.linkup should be set consistently using netif_oper_up rather than netif_carrier_ok. Fixes: f1d22a1e0595 ("team: account for oper state") Signed-off-by: George Wilkie Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 958f1cf67282..6ce3f666d142 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1256,7 +1256,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, list_add_tail_rcu(&port->list, &team->port_list); team_port_enable(team, port); __team_compute_features(team); - __team_port_change_port_added(port, !!netif_carrier_ok(port_dev)); + __team_port_change_port_added(port, !!netif_oper_up(port_dev)); __team_options_change_check(team); netdev_info(dev, "Port device %s added\n", portname); @@ -2915,7 +2915,7 @@ static int team_device_event(struct notifier_block *unused, switch (event) { case NETDEV_UP: - if (netif_carrier_ok(dev)) + if (netif_oper_up(dev)) team_port_change_check(port, true); break; case NETDEV_DOWN: -- cgit v1.2.3 From efcc9bcaf77c07df01371a7c34e50424c291f3ac Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 20 Feb 2019 09:23:03 +0100 Subject: net: ip6_gre: fix possible NULL pointer dereference in ip6erspan_set_version Fix a possible NULL pointer dereference in ip6erspan_set_version checking nlattr data pointer kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 7549 Comm: syz-executor432 Not tainted 5.0.0-rc6-next-20190218 #37 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:ip6erspan_set_version+0x5c/0x350 net/ipv6/ip6_gre.c:1726 Code: 07 38 d0 7f 08 84 c0 0f 85 9f 02 00 00 49 8d bc 24 b0 00 00 00 c6 43 54 01 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 9a 02 00 00 4d 8b ac 24 b0 00 00 00 4d 85 ed 0f RSP: 0018:ffff888089ed7168 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8880869d6e58 RCX: 0000000000000000 RDX: 0000000000000016 RSI: ffffffff862736b4 RDI: 00000000000000b0 RBP: ffff888089ed7180 R08: 1ffff11010d3adcb R09: ffff8880869d6e58 R10: ffffed1010d3add5 R11: ffff8880869d6eaf R12: 0000000000000000 R13: ffffffff8931f8c0 R14: ffffffff862825d0 R15: ffff8880869d6e58 FS: 0000000000b3d880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000184 CR3: 0000000092cc5000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip6erspan_newlink+0x66/0x7b0 net/ipv6/ip6_gre.c:2210 __rtnl_newlink+0x107b/0x16c0 net/core/rtnetlink.c:3176 rtnl_newlink+0x69/0xa0 net/core/rtnetlink.c:3234 rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:631 ___sys_sendmsg+0x806/0x930 net/socket.c:2136 __sys_sendmsg+0x105/0x1d0 net/socket.c:2174 __do_sys_sendmsg net/socket.c:2183 [inline] __se_sys_sendmsg net/socket.c:2181 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2181 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440159 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffa69156e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440159 RDX: 0000000000000000 RSI: 0000000020001340 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000001 R09: 00000000004002c8 R10: 0000000000000011 R11: 0000000000000246 R12: 00000000004019e0 R13: 0000000000401a70 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: ---[ end trace 09f8a7d13b4faaa1 ]--- RIP: 0010:ip6erspan_set_version+0x5c/0x350 net/ipv6/ip6_gre.c:1726 Code: 07 38 d0 7f 08 84 c0 0f 85 9f 02 00 00 49 8d bc 24 b0 00 00 00 c6 43 54 01 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 9a 02 00 00 4d 8b ac 24 b0 00 00 00 4d 85 ed 0f RSP: 0018:ffff888089ed7168 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8880869d6e58 RCX: 0000000000000000 RDX: 0000000000000016 RSI: ffffffff862736b4 RDI: 00000000000000b0 RBP: ffff888089ed7180 R08: 1ffff11010d3adcb R09: ffff8880869d6e58 R10: ffffed1010d3add5 R11: ffff8880869d6eaf R12: 0000000000000000 R13: ffffffff8931f8c0 R14: ffffffff862825d0 R15: ffff8880869d6e58 FS: 0000000000b3d880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000184 CR3: 0000000092cc5000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 4974d5f678ab ("net: ip6_gre: initialize erspan_ver just for erspan tunnels") Reported-and-tested-by: syzbot+30191cf1057abd3064af@syzkaller.appspotmail.com Signed-off-by: Lorenzo Bianconi Reviewed-by: Greg Rose Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0fdd0109d131..26f25b6e2833 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1722,6 +1722,9 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], static void ip6erspan_set_version(struct nlattr *data[], struct __ip6_tnl_parm *parms) { + if (!data) + return; + parms->erspan_ver = 1; if (data[IFLA_GRE_ERSPAN_VER]) parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); -- cgit v1.2.3 From f6d25aca1ba3f46b76dabf6023a0dc2062dc792e Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:43 +0000 Subject: net: thunderx: correct typo in macro name Correct STREERING to STEERING at macro name for BGX steering register. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index e337da6ba2a4..673c57b8023f 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1217,7 +1217,7 @@ static void bgx_init_hw(struct bgx *bgx) /* Disable MAC steering (NCSI traffic) */ for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++) - bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00); + bgx_reg_write(bgx, 0, BGX_CMR_RX_STEERING + (i * 8), 0x00); } static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index cbdd20b9ee6f..5cbc54e9eb19 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -60,7 +60,7 @@ #define RX_DMACX_CAM_EN BIT_ULL(48) #define RX_DMACX_CAM_LMACID(x) (((u64)x) << 49) #define RX_DMAC_COUNT 32 -#define BGX_CMR_RX_STREERING 0x300 +#define BGX_CMR_RX_STEERING 0x300 #define RX_TRAFFIC_STEER_RULE_COUNT 8 #define BGX_CMR_CHAN_MSK_AND 0x450 #define BGX_CMR_BIST_STATUS 0x460 -- cgit v1.2.3 From 2ecbe4f4a027890a5d74a5100075aa6a373bea2c Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:43 +0000 Subject: net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them. Having one work queue for receive mode configuration ndo_set_rx_mode() call for all VFs results in making each of them wait till the set_rx_mode() call completes for another VF if any of close, set receive mode and change flags calls being already invoked. Potentially this could cause device state change before appropriate call of receive mode configuration completes, so the call itself became meaningless, corrupt data or break configuration sequence. We don't need any delays in NIC VF configuration sequence so having delayed work call with 0 delay has no sense. This commit is to implement one work queue for each NIC VF for set_rx_mode task and to let them work independently and replacing delayed_work with work_struct. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 4 +++- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 30 +++++++++++++----------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index f4d81765221e..376a96bce33f 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -271,7 +271,7 @@ struct xcast_addr_list { }; struct nicvf_work { - struct delayed_work work; + struct work_struct work; u8 mode; struct xcast_addr_list *mc; }; @@ -327,6 +327,8 @@ struct nicvf { struct nicvf_work rx_mode_work; /* spinlock to protect workqueue arguments from concurrent access */ spinlock_t rx_mode_wq_lock; + /* workqueue for handling kernel ndo_set_rx_mode() calls */ + struct workqueue_struct *nicvf_rx_mode_wq; /* PTP timestamp */ struct cavium_ptp *ptp_clock; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 88f8a8fa93cd..abf24e7dff2d 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -68,9 +68,6 @@ module_param(cpi_alg, int, 0444); MODULE_PARM_DESC(cpi_alg, "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); -/* workqueue for handling kernel ndo_set_rx_mode() calls */ -static struct workqueue_struct *nicvf_rx_mode_wq; - static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) { if (nic->sqs_mode) @@ -1311,6 +1308,9 @@ int nicvf_stop(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; union nic_mbx mbx = {}; + /* wait till all queued set_rx_mode tasks completes */ + drain_workqueue(nic->nicvf_rx_mode_wq); + mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; nicvf_send_msg_to_pf(nic, &mbx); @@ -1418,6 +1418,9 @@ int nicvf_open(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; union nic_mbx mbx = {}; + /* wait till all queued set_rx_mode tasks completes if any */ + drain_workqueue(nic->nicvf_rx_mode_wq); + netif_carrier_off(netdev); err = nicvf_register_misc_interrupt(nic); @@ -1973,7 +1976,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, static void nicvf_set_rx_mode_task(struct work_struct *work_arg) { struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work, - work.work); + work); struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work); u8 mode; struct xcast_addr_list *mc; @@ -2030,7 +2033,7 @@ static void nicvf_set_rx_mode(struct net_device *netdev) kfree(nic->rx_mode_work.mc); nic->rx_mode_work.mc = mc_list; nic->rx_mode_work.mode = mode; - queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0); + queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work); spin_unlock(&nic->rx_mode_wq_lock); } @@ -2187,7 +2190,10 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&nic->reset_task, nicvf_reset_task); - INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); + nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d", + WQ_MEM_RECLAIM, + nic->vf_id); + INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); spin_lock_init(&nic->rx_mode_wq_lock); err = register_netdev(netdev); @@ -2228,13 +2234,15 @@ static void nicvf_remove(struct pci_dev *pdev) nic = netdev_priv(netdev); pnetdev = nic->pnicvf->netdev; - cancel_delayed_work_sync(&nic->rx_mode_work.work); - /* Check if this Qset is assigned to different VF. * If yes, clean primary and all secondary Qsets. */ if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) unregister_netdev(pnetdev); + if (nic->nicvf_rx_mode_wq) { + destroy_workqueue(nic->nicvf_rx_mode_wq); + nic->nicvf_rx_mode_wq = NULL; + } nicvf_unregister_interrupts(nic); pci_set_drvdata(pdev, NULL); if (nic->drv_stats) @@ -2261,17 +2269,11 @@ static struct pci_driver nicvf_driver = { static int __init nicvf_init_module(void) { pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); - nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic", - WQ_MEM_RECLAIM); return pci_register_driver(&nicvf_driver); } static void __exit nicvf_cleanup_module(void) { - if (nicvf_rx_mode_wq) { - destroy_workqueue(nicvf_rx_mode_wq); - nicvf_rx_mode_wq = NULL; - } pci_unregister_driver(&nicvf_driver); } -- cgit v1.2.3 From 0dd563b9a62c4cbabf5d4fd6596440c2491e72b1 Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:43 +0000 Subject: net: thunderx: make CFG_DONE message to run through generic send-ack sequence At the end of NIC VF initialization VF sends CFG_DONE message to PF without using nicvf_msg_send_to_pf routine. This potentially could re-write data in mailbox. This commit is to implement common way of sending CFG_DONE message by the same way with other configuration messages by using nicvf_send_msg_to_pf() routine. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 2 +- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 6c8dcb65ff03..90497a27df18 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1039,7 +1039,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) case NIC_MBOX_MSG_CFG_DONE: /* Last message of VF config msg sequence */ nic_enable_vf(nic, vf, true); - goto unlock; + break; case NIC_MBOX_MSG_SHUTDOWN: /* First msg in VF teardown sequence */ if (vf >= nic->num_vf_en) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index abf24e7dff2d..19b58fc3ca41 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -169,6 +169,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic) return 1; } +static void nicvf_send_cfg_done(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + if (nicvf_send_msg_to_pf(nic, &mbx)) { + netdev_err(nic->netdev, + "PF didn't respond to CFG DONE msg\n"); + } +} + static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) { if (bgx->rx) @@ -1416,7 +1427,6 @@ int nicvf_open(struct net_device *netdev) struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; - union nic_mbx mbx = {}; /* wait till all queued set_rx_mode tasks completes if any */ drain_workqueue(nic->nicvf_rx_mode_wq); @@ -1515,8 +1525,7 @@ int nicvf_open(struct net_device *netdev) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); /* Send VF config done msg to PF */ - mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; - nicvf_write_to_mbx(nic, &mbx); + nicvf_send_cfg_done(nic); return 0; cleanup: -- cgit v1.2.3 From 7db730d9d2f7b6af6aeac621b1890ea477a0cb8d Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:44 +0000 Subject: net: thunderx: add nicvf_send_msg_to_pf result check for set_rx_mode_task The rx_set_mode invokes number of messages to be send to PF for receive mode configuration. In case if there any issues we need to stop sending messages and release allocated memory. This commit is to implement check of nicvf_msg_send_to_pf() result. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 19b58fc3ca41..45f06504a61b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1953,7 +1953,8 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, /* flush DMAC filters and reset RX mode */ mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST; - nicvf_send_msg_to_pf(nic, &mbx); + if (nicvf_send_msg_to_pf(nic, &mbx) < 0) + goto free_mc; if (mode & BGX_XCAST_MCAST_FILTER) { /* once enabling filtering, we need to signal to PF to add @@ -1961,7 +1962,8 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, */ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; mbx.xcast.data.mac = 0; - nicvf_send_msg_to_pf(nic, &mbx); + if (nicvf_send_msg_to_pf(nic, &mbx) < 0) + goto free_mc; } /* check if we have any specific MACs to be added to PF DMAC filter */ @@ -1970,9 +1972,9 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, for (idx = 0; idx < mc_addrs->count; idx++) { mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; mbx.xcast.data.mac = mc_addrs->mc[idx]; - nicvf_send_msg_to_pf(nic, &mbx); + if (nicvf_send_msg_to_pf(nic, &mbx) < 0) + goto free_mc; } - kfree(mc_addrs); } /* and finally set rx mode for PF accordingly */ @@ -1980,6 +1982,8 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, mbx.xcast.data.mode = mode; nicvf_send_msg_to_pf(nic, &mbx); +free_mc: + kfree(mc_addrs); } static void nicvf_set_rx_mode_task(struct work_struct *work_arg) -- cgit v1.2.3 From 5354439612894033e3f3b934f0bc03afb5f4ddc5 Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:44 +0000 Subject: net: thunderx: rework xcast message structure to make it fit into 64 bit To communicate to PF each of ThunderX NIC VF uses mailbox which is pair of 64 bit registers available to both VFn and PF. This commit is to change the xcast message structure in order to fit it into 64 bit. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 6 ++---- drivers/net/ethernet/cavium/thunder/nic_main.c | 4 ++-- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 376a96bce33f..227343625e83 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -577,10 +577,8 @@ struct set_ptp { struct xcast { u8 msg; - union { - u8 mode; - u64 mac; - } data; + u8 mode; + u64 mac:48; }; /* 128 bit shared memory between PF and each VF */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 90497a27df18..620dbe082ca0 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1094,7 +1094,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); bgx_set_dmac_cam_filter(nic->node, bgx, lmac, - mbx.xcast.data.mac, + mbx.xcast.mac, vf < NIC_VF_PER_MBX_REG ? vf : vf - NIC_VF_PER_MBX_REG); break; @@ -1106,7 +1106,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) } bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); - bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode); + bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode); break; default: dev_err(&nic->pdev->dev, diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 45f06504a61b..da5986ca7bee 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1961,7 +1961,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, * its' own LMAC to the filter to accept packets for it. */ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; - mbx.xcast.data.mac = 0; + mbx.xcast.mac = 0; if (nicvf_send_msg_to_pf(nic, &mbx) < 0) goto free_mc; } @@ -1971,7 +1971,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, /* now go through kernel list of MACs and add them one by one */ for (idx = 0; idx < mc_addrs->count; idx++) { mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; - mbx.xcast.data.mac = mc_addrs->mc[idx]; + mbx.xcast.mac = mc_addrs->mc[idx]; if (nicvf_send_msg_to_pf(nic, &mbx) < 0) goto free_mc; } @@ -1979,7 +1979,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, /* and finally set rx mode for PF accordingly */ mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST; - mbx.xcast.data.mode = mode; + mbx.xcast.mode = mode; nicvf_send_msg_to_pf(nic, &mbx); free_mc: -- cgit v1.2.3 From 609ea65c65a0f801c285abf524d36d1f4635d942 Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:44 +0000 Subject: net: thunderx: add mutex to protect mailbox from concurrent calls for same VF In some cases it could happen that nicvf_send_msg_to_pf() could be called concurrently for the same NIC VF, and thus re-writing mailbox contents and breaking messaging sequence with PF by re-writing NICVF data. This commit is to implement mutex for NICVF to protect mailbox registers and NICVF messaging control data from concurrent access. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 2 ++ drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 227343625e83..86cda3f4b37b 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -329,6 +329,8 @@ struct nicvf { spinlock_t rx_mode_wq_lock; /* workqueue for handling kernel ndo_set_rx_mode() calls */ struct workqueue_struct *nicvf_rx_mode_wq; + /* mutex to protect VF's mailbox contents from concurrent access */ + struct mutex rx_mode_mtx; /* PTP timestamp */ struct cavium_ptp *ptp_clock; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index da5986ca7bee..2332e3e95e0e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -124,6 +124,9 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) { int timeout = NIC_MBOX_MSG_TIMEOUT; int sleep = 10; + int ret = 0; + + mutex_lock(&nic->rx_mode_mtx); nic->pf_acked = false; nic->pf_nacked = false; @@ -136,7 +139,8 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) netdev_err(nic->netdev, "PF NACK to mbox msg 0x%02x from VF%d\n", (mbx->msg.msg & 0xFF), nic->vf_id); - return -EINVAL; + ret = -EINVAL; + break; } msleep(sleep); if (nic->pf_acked) @@ -146,10 +150,12 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) netdev_err(nic->netdev, "PF didn't ACK to mbox msg 0x%02x from VF%d\n", (mbx->msg.msg & 0xFF), nic->vf_id); - return -EBUSY; + ret = -EBUSY; + break; } } - return 0; + mutex_unlock(&nic->rx_mode_mtx); + return ret; } /* Checks if VF is able to comminicate with PF @@ -2208,6 +2214,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->vf_id); INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); spin_lock_init(&nic->rx_mode_wq_lock); + mutex_init(&nic->rx_mode_mtx); err = register_netdev(netdev); if (err) { -- cgit v1.2.3 From 2c632ad8bc744d2ad59dd381ce56fae143cf1e0a Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:45 +0000 Subject: net: thunderx: move link state polling function to VF Move the link change polling task to VF side in order to prevent races between VF and PF while sending link change message(s). This commit is to implement link change request to be initiated by VF. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 2 +- drivers/net/ethernet/cavium/thunder/nic_main.c | 39 ++++++++++++++++-- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 52 +++++++++++++++++------- 3 files changed, 74 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 86cda3f4b37b..62636c1ed141 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -331,7 +331,7 @@ struct nicvf { struct workqueue_struct *nicvf_rx_mode_wq; /* mutex to protect VF's mailbox contents from concurrent access */ struct mutex rx_mode_mtx; - + struct delayed_work link_change_work; /* PTP timestamp */ struct cavium_ptp *ptp_clock; /* Inbound timestamping is on */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 620dbe082ca0..8ab71dae3988 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -929,6 +929,35 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp) nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val); } +static void nic_link_status_get(struct nicpf *nic, u8 vf) +{ + union nic_mbx mbx = {}; + struct bgx_link_status link; + u8 bgx, lmac; + + mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; + + /* Get BGX, LMAC indices for the VF */ + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + /* Get interface link status */ + bgx_get_lmac_link_state(nic->node, bgx, lmac, &link); + + nic->link[vf] = link.link_up; + nic->duplex[vf] = link.duplex; + nic->speed[vf] = link.speed; + + /* Send a mbox message to VF with current link status */ + mbx.link_status.link_up = link.link_up; + mbx.link_status.duplex = link.duplex; + mbx.link_status.speed = link.speed; + mbx.link_status.mac_type = link.mac_type; + + /* reply with link status */ + nic_send_msg_to_vf(nic, vf, &mbx); +} + /* Interrupt handler to handle mailbox messages from VFs */ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) { @@ -1108,6 +1137,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode); break; + case NIC_MBOX_MSG_BGX_LINK_CHANGE: + if (vf >= nic->num_vf_en) { + ret = -1; /* NACK */ + break; + } + nic_link_status_get(nic, vf); + goto unlock; default: dev_err(&nic->pdev->dev, "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg); @@ -1419,9 +1455,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_sriov; } - INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link); - queue_delayed_work(nic->check_link, &nic->dwork, 0); - return 0; err_disable_sriov: diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 2332e3e95e0e..503cfadff4ac 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -242,21 +242,24 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic) break; case NIC_MBOX_MSG_BGX_LINK_CHANGE: nic->pf_acked = true; - nic->link_up = mbx.link_status.link_up; - nic->duplex = mbx.link_status.duplex; - nic->speed = mbx.link_status.speed; - nic->mac_type = mbx.link_status.mac_type; - if (nic->link_up) { - netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", - nic->speed, - nic->duplex == DUPLEX_FULL ? - "Full" : "Half"); - netif_carrier_on(nic->netdev); - netif_tx_start_all_queues(nic->netdev); - } else { - netdev_info(nic->netdev, "Link is Down\n"); - netif_carrier_off(nic->netdev); - netif_tx_stop_all_queues(nic->netdev); + if (nic->link_up != mbx.link_status.link_up) { + nic->link_up = mbx.link_status.link_up; + nic->duplex = mbx.link_status.duplex; + nic->speed = mbx.link_status.speed; + nic->mac_type = mbx.link_status.mac_type; + if (nic->link_up) { + netdev_info(nic->netdev, + "Link is Up %d Mbps %s duplex\n", + nic->speed, + nic->duplex == DUPLEX_FULL ? + "Full" : "Half"); + netif_carrier_on(nic->netdev); + netif_tx_start_all_queues(nic->netdev); + } else { + netdev_info(nic->netdev, "Link is Down\n"); + netif_carrier_off(nic->netdev); + netif_tx_stop_all_queues(nic->netdev); + } } break; case NIC_MBOX_MSG_ALLOC_SQS: @@ -1325,6 +1328,8 @@ int nicvf_stop(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; union nic_mbx mbx = {}; + cancel_delayed_work_sync(&nic->link_change_work); + /* wait till all queued set_rx_mode tasks completes */ drain_workqueue(nic->nicvf_rx_mode_wq); @@ -1427,6 +1432,18 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) return nicvf_send_msg_to_pf(nic, &mbx); } +static void nicvf_link_status_check_task(struct work_struct *work_arg) +{ + struct nicvf *nic = container_of(work_arg, + struct nicvf, + link_change_work.work); + union nic_mbx mbx = {}; + mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; + nicvf_send_msg_to_pf(nic, &mbx); + queue_delayed_work(nic->nicvf_rx_mode_wq, + &nic->link_change_work, 2 * HZ); +} + int nicvf_open(struct net_device *netdev) { int cpu, err, qidx; @@ -1533,6 +1550,11 @@ int nicvf_open(struct net_device *netdev) /* Send VF config done msg to PF */ nicvf_send_cfg_done(nic); + INIT_DELAYED_WORK(&nic->link_change_work, + nicvf_link_status_check_task); + queue_delayed_work(nic->nicvf_rx_mode_wq, + &nic->link_change_work, 0); + return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); -- cgit v1.2.3 From 2e1c3fff5e496621ccbb1a207b775c1dd1d524ac Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:45 +0000 Subject: net: thunderx: remove link change polling code and info from nicpf Since link change polling routine was moved to nicvf side, we don't need anymore polling function at nicpf side along with link status info for all enabled Vfs as at VF side this info is already tracked. This commit is to remove unnecessary code & fields from nicpf structure. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 114 +++---------------------- 1 file changed, 12 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 8ab71dae3988..c90252829ed3 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -57,14 +57,8 @@ struct nicpf { #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) #define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) u8 *vf_lmac_map; - struct delayed_work dwork; - struct workqueue_struct *check_link; - u8 *link; - u8 *duplex; - u32 *speed; u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; u16 rssi_base[MAX_NUM_VFS_SUPPORTED]; - bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; /* MSI-X */ u8 num_vec; @@ -929,6 +923,10 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp) nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val); } +/* Get BGX LMAC link status and update corresponding VF + * if there is a change, valid only if internal L2 switch + * is not present otherwise VF link is always treated as up + */ static void nic_link_status_get(struct nicpf *nic, u8 vf) { union nic_mbx mbx = {}; @@ -944,10 +942,6 @@ static void nic_link_status_get(struct nicpf *nic, u8 vf) /* Get interface link status */ bgx_get_lmac_link_state(nic->node, bgx, lmac, &link); - nic->link[vf] = link.link_up; - nic->duplex[vf] = link.duplex; - nic->speed[vf] = link.speed; - /* Send a mbox message to VF with current link status */ mbx.link_status.link_up = link.link_up; mbx.link_status.duplex = link.duplex; @@ -970,8 +964,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) int i; int ret = 0; - nic->mbx_lock[vf] = true; - mbx_addr = nic_get_mbx_addr(vf); mbx_data = (u64 *)&mbx; @@ -986,12 +978,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) switch (mbx.msg.msg) { case NIC_MBOX_MSG_READY: nic_mbx_send_ready(nic, vf); - if (vf < nic->num_vf_en) { - nic->link[vf] = 0; - nic->duplex[vf] = 0; - nic->speed[vf] = 0; - } - goto unlock; + return; case NIC_MBOX_MSG_QS_CFG: reg_addr = NIC_PF_QSET_0_127_CFG | (mbx.qs.num << NIC_QS_ID_SHIFT); @@ -1060,7 +1047,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) break; case NIC_MBOX_MSG_RSS_SIZE: nic_send_rss_size(nic, vf); - goto unlock; + return; case NIC_MBOX_MSG_RSS_CFG: case NIC_MBOX_MSG_RSS_CFG_CONT: nic_config_rss(nic, &mbx.rss_cfg); @@ -1078,19 +1065,19 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) break; case NIC_MBOX_MSG_ALLOC_SQS: nic_alloc_sqs(nic, &mbx.sqs_alloc); - goto unlock; + return; case NIC_MBOX_MSG_NICVF_PTR: nic->nicvf[vf] = mbx.nicvf.nicvf; break; case NIC_MBOX_MSG_PNICVF_PTR: nic_send_pnicvf(nic, vf); - goto unlock; + return; case NIC_MBOX_MSG_SNICVF_PTR: nic_send_snicvf(nic, &mbx.nicvf); - goto unlock; + return; case NIC_MBOX_MSG_BGX_STATS: nic_get_bgx_stats(nic, &mbx.bgx_stats); - goto unlock; + return; case NIC_MBOX_MSG_LOOPBACK: ret = nic_config_loopback(nic, &mbx.lbk); break; @@ -1099,7 +1086,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) break; case NIC_MBOX_MSG_PFC: nic_pause_frame(nic, vf, &mbx.pfc); - goto unlock; + return; case NIC_MBOX_MSG_PTP_CFG: nic_config_timestamp(nic, vf, &mbx.ptp); break; @@ -1143,7 +1130,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) break; } nic_link_status_get(nic, vf); - goto unlock; + return; default: dev_err(&nic->pdev->dev, "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg); @@ -1157,8 +1144,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) mbx.msg.msg, vf); nic_mbx_send_nack(nic, vf); } -unlock: - nic->mbx_lock[vf] = false; } static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) @@ -1306,52 +1291,6 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic) return 0; } -/* Poll for BGX LMAC link status and update corresponding VF - * if there is a change, valid only if internal L2 switch - * is not present otherwise VF link is always treated as up - */ -static void nic_poll_for_link(struct work_struct *work) -{ - union nic_mbx mbx = {}; - struct nicpf *nic; - struct bgx_link_status link; - u8 vf, bgx, lmac; - - nic = container_of(work, struct nicpf, dwork.work); - - mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; - - for (vf = 0; vf < nic->num_vf_en; vf++) { - /* Poll only if VF is UP */ - if (!nic->vf_enabled[vf]) - continue; - - /* Get BGX, LMAC indices for the VF */ - bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); - lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); - /* Get interface link status */ - bgx_get_lmac_link_state(nic->node, bgx, lmac, &link); - - /* Inform VF only if link status changed */ - if (nic->link[vf] == link.link_up) - continue; - - if (!nic->mbx_lock[vf]) { - nic->link[vf] = link.link_up; - nic->duplex[vf] = link.duplex; - nic->speed[vf] = link.speed; - - /* Send a mbox message to VF with current link status */ - mbx.link_status.link_up = link.link_up; - mbx.link_status.duplex = link.duplex; - mbx.link_status.speed = link.speed; - mbx.link_status.mac_type = link.mac_type; - nic_send_msg_to_vf(nic, vf, &mbx); - } - } - queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2); -} - static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; @@ -1420,18 +1359,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!nic->vf_lmac_map) goto err_release_regions; - nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->link) - goto err_release_regions; - - nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->duplex) - goto err_release_regions; - - nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL); - if (!nic->speed) - goto err_release_regions; - /* Initialize hardware */ nic_init_hw(nic); @@ -1447,19 +1374,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_unregister_interrupts; - /* Register a physical link status poll fn() */ - nic->check_link = alloc_workqueue("check_link_status", - WQ_UNBOUND | WQ_MEM_RECLAIM, 1); - if (!nic->check_link) { - err = -ENOMEM; - goto err_disable_sriov; - } - return 0; -err_disable_sriov: - if (nic->flags & NIC_SRIOV_ENABLED) - pci_disable_sriov(pdev); err_unregister_interrupts: nic_unregister_interrupts(nic); err_release_regions: @@ -1480,12 +1396,6 @@ static void nic_remove(struct pci_dev *pdev) if (nic->flags & NIC_SRIOV_ENABLED) pci_disable_sriov(pdev); - if (nic->check_link) { - /* Destroy work Queue */ - cancel_delayed_work_sync(&nic->dwork); - destroy_workqueue(nic->check_link); - } - nic_unregister_interrupts(nic); pci_release_regions(pdev); -- cgit v1.2.3 From f5b51fe804ec2a6edce0f8f6b11ea57283f5857b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Feb 2019 18:18:12 +0100 Subject: ipv6: route: purge exception on removal When a netdevice is unregistered, we flush the relevant exception via rt6_sync_down_dev() -> fib6_ifdown() -> fib6_del() -> fib6_del_route(). Finally, we end-up calling rt6_remove_exception(), where we release the relevant dst, while we keep the references to the related fib6_info and dev. Such references should be released later when the dst will be destroyed. There are a number of caches that can keep the exception around for an unlimited amount of time - namely dst_cache, possibly even socket cache. As a result device registration may hang, as demonstrated by this script: ip netns add cl ip netns add rt ip netns add srv ip netns exec rt sysctl -w net.ipv6.conf.all.forwarding=1 ip link add name cl_veth type veth peer name cl_rt_veth ip link set dev cl_veth netns cl ip -n cl link set dev cl_veth up ip -n cl addr add dev cl_veth 2001::2/64 ip -n cl route add default via 2001::1 ip -n cl link add tunv6 type ip6tnl mode ip6ip6 local 2001::2 remote 2002::1 hoplimit 64 dev cl_veth ip -n cl link set tunv6 up ip -n cl addr add 2013::2/64 dev tunv6 ip link set dev cl_rt_veth netns rt ip -n rt link set dev cl_rt_veth up ip -n rt addr add dev cl_rt_veth 2001::1/64 ip link add name rt_srv_veth type veth peer name srv_veth ip link set dev srv_veth netns srv ip -n srv link set dev srv_veth up ip -n srv addr add dev srv_veth 2002::1/64 ip -n srv route add default via 2002::2 ip -n srv link add tunv6 type ip6tnl mode ip6ip6 local 2002::1 remote 2001::2 hoplimit 64 dev srv_veth ip -n srv link set tunv6 up ip -n srv addr add 2013::1/64 dev tunv6 ip link set dev rt_srv_veth netns rt ip -n rt link set dev rt_srv_veth up ip -n rt addr add dev rt_srv_veth 2002::2/64 ip netns exec srv netserver & sleep 0.1 ip netns exec cl ping6 -c 4 2013::1 ip netns exec cl netperf -H 2013::1 -t TCP_STREAM -l 3 & sleep 1 ip -n rt link set dev rt_srv_veth mtu 1400 wait %2 ip -n cl link del cl_veth This commit addresses the issue purging all the references held by the exception at time, as we currently do for e.g. ipv6 pcpu dst entries. v1 -> v2: - re-order the code to avoid accessing dst and net after dst_dev_put() Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 74b9b6fd4168..047c47224dba 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1274,18 +1274,29 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { + struct fib6_info *from; struct net *net; if (!bucket || !rt6_ex) return; net = dev_net(rt6_ex->rt6i->dst.dev); + net->ipv6.rt6_stats->fib_rt_cache--; + + /* purge completely the exception to allow releasing the held resources: + * some [sk] cache may keep the dst around for unlimited time + */ + from = rcu_dereference_protected(rt6_ex->rt6i->from, + lockdep_is_held(&rt6_exception_lock)); + rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + fib6_info_release(from); + dst_dev_put(&rt6_ex->rt6i->dst); + hlist_del_rcu(&rt6_ex->hlist); dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; - net->ipv6.rt6_stats->fib_rt_cache--; } /* Remove oldest rt6_ex in bucket and free the memory -- cgit v1.2.3 From 52baf9878b65872a7fc735d7fae3350ea9f30646 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Feb 2019 22:34:54 +0100 Subject: net: socket: add check for negative optlen in compat setsockopt __sys_setsockopt() already checks for `optlen < 0`. Add an equivalent check to the compat path for robustness. This has to be `> INT_MAX` instead of `< 0` because the signedness of `optlen` is different here. Signed-off-by: Jann Horn Signed-off-by: David S. Miller --- net/compat.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/compat.c b/net/compat.c index 959d1c51826d..3d348198004f 100644 --- a/net/compat.c +++ b/net/compat.c @@ -388,8 +388,12 @@ static int __compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen) { int err; - struct socket *sock = sockfd_lookup(fd, &err); + struct socket *sock; + + if (optlen > INT_MAX) + return -EINVAL; + sock = sockfd_lookup(fd, &err); if (sock) { err = security_socket_setsockopt(sock, level, optname); if (err) { -- cgit v1.2.3 From 80d79ad224ba22381e8d26b54674a86433e75d18 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 14:58:50 -0800 Subject: Documentation: networking: switchdev: Update port parent ID section Update the section about switchdev drivers having to implement a switchdev_port_attr_get() function to return SWITCHDEV_ATTR_ID_PORT_PARENT_ID since that is no longer valid after commit bccb30254a4a ("net: Get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID"). Fixes: bccb30254a4a ("net: Get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID") Reviewed-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 82236a17b5e6..97b7ca8b9b86 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -92,11 +92,11 @@ device. Switch ID ^^^^^^^^^ -The switchdev driver must implement the switchdev op switchdev_port_attr_get -for SWITCHDEV_ATTR_ID_PORT_PARENT_ID for each port netdev, returning the same -physical ID for each port of a switch. The ID must be unique between switches -on the same system. The ID does not need to be unique between switches on -different systems. +The switchdev driver must implement the net_device operation +ndo_get_port_parent_id for each port netdev, returning the same physical ID for +each port of a switch. The ID must be unique between switches on the same +system. The ID does not need to be unique between switches on different +systems. The switch ID is used to locate ports on a switch and to know if aggregated ports belong to the same switch. -- cgit v1.2.3 From 71c190249f0ced5b26377ea6bf829ab3af77a40c Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 22 Feb 2019 22:36:03 +0000 Subject: nfp: bpf: fix code-gen bug on BPF_ALU | BPF_XOR | BPF_K The intended optimization should be A ^ 0 = A, not A ^ -1 = A. Fixes: cd7df56ed3e6 ("nfp: add BPF to NFP code translator") Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e23ca90289f7..a09696540171 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2309,7 +2309,7 @@ static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); } static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -- cgit v1.2.3 From f036ebd9bfbe1e91a3d855e85e05fc5ff156b641 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 22 Feb 2019 22:36:04 +0000 Subject: nfp: bpf: fix ALU32 high bits clearance bug NFP BPF JIT compiler is doing a couple of small optimizations when jitting ALU imm instructions, some of these optimizations could save code-gen, for example: A & -1 = A A | 0 = A A ^ 0 = A However, for ALU32, high 32-bit of the 64-bit register should still be cleared according to ISA semantics. Fixes: cd7df56ed3e6 ("nfp: add BPF to NFP code translator") Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index a09696540171..0a868c829b90 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1291,15 +1291,10 @@ wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - enum alu_op alu_op, bool skip) + enum alu_op alu_op) { const struct bpf_insn *insn = &meta->insn; - if (skip) { - meta->skip = true; - return 0; - } - wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); @@ -2309,7 +2304,7 @@ static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR); } static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -2319,7 +2314,7 @@ static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND); } static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -2329,7 +2324,7 @@ static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR); } static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -2339,7 +2334,7 @@ static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD); } static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -2349,7 +2344,7 @@ static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB); } static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -- cgit v1.2.3 From 67681d02aaa1db9044a16df4ca9c77cde1221a3e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 20 Feb 2019 19:07:31 -0500 Subject: bnxt_en: Fix typo in firmware message timeout logic. The logic that polls for the firmware message response uses a shorter sleep interval for the first few passes. But there was a typo so it was using the wrong counter (larger counter) for these short sleep passes. The result is a slightly shorter timeout period for these firmware messages than intended. Fix it by using the proper counter. Fixes: 9751e8e71487 ("bnxt_en: reduce timeout on initial HWRM calls") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8bc7e495b027..1ddd6721d7cd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3903,7 +3903,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (len) break; /* on first few passes, just barely sleep */ - if (i < DFLT_HWRM_CMD_TIMEOUT) + if (i < HWRM_SHORT_TIMEOUT_COUNTER) usleep_range(HWRM_SHORT_MIN_TIMEOUT, HWRM_SHORT_MAX_TIMEOUT); else -- cgit v1.2.3 From 0000b81a063b5f3ab82fa18041c28327ce72c312 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 20 Feb 2019 19:07:32 -0500 Subject: bnxt_en: Wait longer for the firmware message response to complete. The code waits up to 20 usec for the firmware response to complete once we've seen the valid response header in the buffer. It turns out that in some scenarios, this wait time is not long enough. Extend it to 150 usec and use usleep_range() instead of udelay(). Fixes: 9751e8e71487 ("bnxt_en: reduce timeout on initial HWRM calls") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1ddd6721d7cd..d95730c6e0f2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3926,7 +3926,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, dma_rmb(); if (*valid) break; - udelay(1); + usleep_range(1, 5); } if (j >= HWRM_VALID_BIT_DELAY_USEC) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a451796deefe..2fb653e0048d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -582,7 +582,7 @@ struct nqe_cn { (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT + \ ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT)) -#define HWRM_VALID_BIT_DELAY_USEC 20 +#define HWRM_VALID_BIT_DELAY_USEC 150 #define BNXT_HWRM_CHNL_CHIMP 0 #define BNXT_HWRM_CHNL_KONG 1 -- cgit v1.2.3 From 97f0082a0592212fc15d4680f5a4d80f79a1687c Mon Sep 17 00:00:00 2001 From: Kalash Nainwal Date: Wed, 20 Feb 2019 16:23:04 -0800 Subject: net: Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 to keep legacy software happy. This is similar to what was done for ipv4 in commit 709772e6e065 ("net: Fix routing tables with id > 255 for legacy software"). Signed-off-by: Kalash Nainwal Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 047c47224dba..ce15dc4ccbfa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4665,7 +4665,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, table = rt->fib6_table->tb6_id; else table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; + rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; -- cgit v1.2.3 From 6ff7b060535e87c2ae14dd8548512abfdda528fb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Feb 2019 22:42:01 +0800 Subject: mdio_bus: Fix use-after-free on device_register fails KASAN has found use-after-free in fixed_mdio_bus_init, commit 0c692d07842a ("drivers/net/phy/mdio_bus.c: call put_device on device_register() failure") call put_device() while device_register() fails,give up the last reference to the device and allow mdiobus_release to be executed ,kfreeing the bus. However in most drives, mdiobus_free be called to free the bus while mdiobus_register fails. use-after-free occurs when access bus again, this patch revert it to let mdiobus_free free the bus. KASAN report details as below: BUG: KASAN: use-after-free in mdiobus_free+0x85/0x90 drivers/net/phy/mdio_bus.c:482 Read of size 4 at addr ffff8881dc824d78 by task syz-executor.0/3524 CPU: 1 PID: 3524 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 print_address_description+0x65/0x270 mm/kasan/report.c:187 kasan_report+0x149/0x18d mm/kasan/report.c:317 mdiobus_free+0x85/0x90 drivers/net/phy/mdio_bus.c:482 fixed_mdio_bus_init+0x283/0x1000 [fixed_phy] ? 0xffffffffc0e40000 ? 0xffffffffc0e40000 ? 0xffffffffc0e40000 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6215c19c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00007f6215c19c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6215c1a6bc R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004 Allocated by task 3524: set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:496 kmalloc include/linux/slab.h:545 [inline] kzalloc include/linux/slab.h:740 [inline] mdiobus_alloc_size+0x54/0x1b0 drivers/net/phy/mdio_bus.c:143 fixed_mdio_bus_init+0x163/0x1000 [fixed_phy] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 3524: set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:458 slab_free_hook mm/slub.c:1409 [inline] slab_free_freelist_hook mm/slub.c:1436 [inline] slab_free mm/slub.c:2986 [inline] kfree+0xe1/0x270 mm/slub.c:3938 device_release+0x78/0x200 drivers/base/core.c:919 kobject_cleanup lib/kobject.c:662 [inline] kobject_release lib/kobject.c:691 [inline] kref_put include/linux/kref.h:67 [inline] kobject_put+0x146/0x240 lib/kobject.c:708 put_device+0x1c/0x30 drivers/base/core.c:2060 __mdiobus_register+0x483/0x560 drivers/net/phy/mdio_bus.c:382 fixed_mdio_bus_init+0x26b/0x1000 [fixed_phy] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8881dc824c80 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 248 bytes inside of 2048-byte region [ffff8881dc824c80, ffff8881dc825480) The buggy address belongs to the page: page:ffffea0007720800 count:1 mapcount:0 mapping:ffff8881f6c02800 index:0x0 compound_mapcount: 0 flags: 0x2fffc0000010200(slab|head) raw: 02fffc0000010200 0000000000000000 0000000500000001 ffff8881f6c02800 raw: 0000000000000000 00000000800f000f 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881dc824c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8881dc824c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8881dc824d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881dc824d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881dc824e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 0c692d07842a ("drivers/net/phy/mdio_bus.c: call put_device on device_register() failure") Signed-off-by: YueHaibing Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 66b9cfe692fc..7368616286ae 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -379,7 +379,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); - put_device(&bus->dev); return -EINVAL; } -- cgit v1.2.3 From 543fc3fb41834a7f2e4cfa1dcf8aa9c472a52e9a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 17:43:57 +0100 Subject: udpv6: add the required annotation to mib type In commit 029a37434880 ("udp6: cleanup stats accounting in recvmsg()") I forgot to add the percpu annotation for the mib pointer. Add it, and make sparse happy. Fixes: 029a37434880 ("udp6: cleanup stats accounting in recvmsg()") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2596ffdeebea..e6c52c27f354 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -288,8 +288,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); + struct udp_mib __percpu *mib; bool checksum_valid = false; - struct udp_mib *mib; int is_udp4; if (flags & MSG_ERRQUEUE) -- cgit v1.2.3 From 5de362df44d71fc8f6b153ae4eaa2a1284c84490 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 17:43:58 +0100 Subject: fou6: fix proto error handler argument type Last argument of gue6_err_proto_handler() has a wrong type annotation, fix it and make sparse happy again. Fixes: b8a51b38e4d4 ("fou, fou6: ICMP error handlers for FoU and GUE") Signed-off-by: Paolo Abeni Acked-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv6/fou6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index b858bd5280bf..867474abe269 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -72,7 +72,7 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, static int gue6_err_proto_handler(int proto, struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, u32 info) + u8 type, u8 code, int offset, __be32 info) { const struct inet6_protocol *ipprot; -- cgit v1.2.3 From 424a7cd078401591fc45587ffb2c012d7f402fb7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 17:43:59 +0100 Subject: udpv6: fix possible user after free in error handler Before derefencing the encap pointer, commit e7cc082455cb ("udp: Support for error handlers of tunnels with arbitrary destination port") checks for a NULL value, but the two fetch operation can race with removal. Fix the above using a single access. Also fix a couple of type annotations, to make sparse happy. Fixes: e7cc082455cb ("udp: Support for error handlers of tunnels with arbitrary destination port") Signed-off-by: Paolo Abeni Acked-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv6/udp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e6c52c27f354..b444483cdb2b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -420,17 +420,19 @@ EXPORT_SYMBOL(udpv6_encap_enable); */ static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, u32 info) + u8 type, u8 code, int offset, __be32 info) { int i; for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, u32 info); + u8 type, u8 code, int offset, __be32 info); + const struct ip6_tnl_encap_ops *encap; - if (!ip6tun_encaps[i]) + encap = rcu_dereference(ip6tun_encaps[i]); + if (!encap) continue; - handler = rcu_dereference(ip6tun_encaps[i]->err_handler); + handler = encap->err_handler; if (handler && !handler(skb, opt, type, code, offset, info)) return 0; } -- cgit v1.2.3 From 92b95364235b6441a36861ff0ca4541a13351d60 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 17:44:00 +0100 Subject: udp: fix possible user after free in error handler Similar to the previous commit, this addresses the same issue for ipv4: use a single fetch operation and use the correct rcu annotation. Fixes: e7cc082455cb ("udp: Support for error handlers of tunnels with arbitrary destination port") Signed-off-by: Paolo Abeni Acked-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5c3cd5d84a6f..372fdc5381a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -562,10 +562,12 @@ static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info) for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { int (*handler)(struct sk_buff *skb, u32 info); + const struct ip_tunnel_encap_ops *encap; - if (!iptun_encaps[i]) + encap = rcu_dereference(iptun_encaps[i]); + if (!encap) continue; - handler = rcu_dereference(iptun_encaps[i]->err_handler); + handler = encap->err_handler; if (handler && !handler(skb, info)) return 0; } -- cgit v1.2.3 From b4b8bb69c104a9345c528692cde5aa520d885360 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Feb 2019 00:03:44 +0100 Subject: bpf, doc: add bpf list as secondary entry to maintainers file We recently created a bpf@vger.kernel.org list (https://lore.kernel.org/bpf/) for BPF related discussions, originally in context of BPF track at LSF/MM for topic discussions. It's *optional* but *desirable* to keep it in Cc for BPF related kernel/loader/llvm/tooling threads, meaning also infrastructure like llvm that sits on top of kernel but is crucial to BPF. In any case, netdev with it's bpf delegate is *as-is* today primary list for patches, so nothing changes in the workflow. Main purpose is to have some more awareness for the bpf@vger.kernel.org list that folks can Cc for BPF specific topics. Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- MAINTAINERS | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 41ce5f4ad838..d78f3714de08 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2852,7 +2852,7 @@ R: Martin KaFai Lau R: Song Liu R: Yonghong Song L: netdev@vger.kernel.org -L: linux-kernel@vger.kernel.org +L: bpf@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git Q: https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147 @@ -2882,6 +2882,7 @@ N: bpf BPF JIT for ARM M: Shubham Bansal L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/arm/net/ @@ -2890,18 +2891,21 @@ M: Daniel Borkmann M: Alexei Starovoitov M: Zi Shen Lim L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Supported F: arch/arm64/net/ BPF JIT for MIPS (32-BIT AND 64-BIT) M: Paul Burton L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/mips/net/ BPF JIT for NFP NICs M: Jakub Kicinski L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Supported F: drivers/net/ethernet/netronome/nfp/bpf/ @@ -2909,6 +2913,7 @@ BPF JIT for POWERPC (32-BIT AND 64-BIT) M: Naveen N. Rao M: Sandipan Das L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/powerpc/net/ @@ -2916,6 +2921,7 @@ BPF JIT for S390 M: Martin Schwidefsky M: Heiko Carstens L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/s390/net/ X: arch/s390/net/pnet.c @@ -2923,12 +2929,14 @@ X: arch/s390/net/pnet.c BPF JIT for SPARC (32-BIT AND 64-BIT) M: David S. Miller L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/sparc/net/ BPF JIT for X86 32-BIT M: Wang YanQing L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/x86/net/bpf_jit_comp32.c @@ -2936,6 +2944,7 @@ BPF JIT for X86 64-BIT M: Alexei Starovoitov M: Daniel Borkmann L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Supported F: arch/x86/net/ X: arch/x86/net/bpf_jit_comp32.c @@ -8487,6 +8496,7 @@ L7 BPF FRAMEWORK M: John Fastabend M: Daniel Borkmann L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: include/linux/skmsg.h F: net/core/skmsg.c @@ -16714,6 +16724,7 @@ M: Jesper Dangaard Brouer M: John Fastabend L: netdev@vger.kernel.org L: xdp-newbies@vger.kernel.org +L: bpf@vger.kernel.org S: Supported F: net/core/xdp.c F: include/net/xdp.h @@ -16727,6 +16738,7 @@ XDP SOCKETS (AF_XDP) M: Björn Töpel M: Magnus Karlsson L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: kernel/bpf/xskmap.c F: net/xdp/ -- cgit v1.2.3 From 61a65d32fe91c2b6ea3aed47c5f1efc7acd89ba2 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Feb 2019 17:54:11 +0100 Subject: net: phy: marvell10g: Fix Multi-G advertisement to only advertise 10G Some Marvell Alaska PHYs support 2.5G, 5G and 10G BaseT links. Their default behaviour is to advertise all of these modes, but at the moment, only 10GBaseT is supported. To prevent link partners from establishing link at that speed, clear these modes upon configuring aneg parameters. Fixes: 20b2af32ff3f ("net: phy: add Marvell Alaska X 88X3310 10Gigabit PHY support") Signed-off-by: Maxime Chevallier Reported-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 82ab6ed3b74e..6bac602094bd 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -26,6 +26,8 @@ #include #include +#define MDIO_AN_10GBT_CTRL_ADV_NBT_MASK 0x01e0 + enum { MV_PCS_BASE_T = 0x0000, MV_PCS_BASE_R = 0x1000, @@ -386,8 +388,10 @@ static int mv3310_config_aneg(struct phy_device *phydev) else reg = 0; + /* Make sure we clear unsupported 2.5G/5G advertising */ ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, - MDIO_AN_10GBT_CTRL_ADV10G, reg); + MDIO_AN_10GBT_CTRL_ADV10G | + MDIO_AN_10GBT_CTRL_ADV_NBT_MASK, reg); if (ret < 0) return ret; if (ret > 0) -- cgit v1.2.3 From 4593403fa516a5a4cffe6883c5062d60932cbfbe Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 22 Feb 2019 14:57:23 +0800 Subject: net: set static variable an initial value in atl2_probe() cards_found is a static variable, but when it enters atl2_probe(), cards_found is set to zero, the value is not consistent with last probe, so next behavior is not our expect. Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atlx/atl2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index bb41becb6609..31ff1e0d1baa 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1335,13 +1335,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct atl2_adapter *adapter; - static int cards_found; + static int cards_found = 0; unsigned long mmio_start; int mmio_len; int err; - cards_found = 0; - err = pci_enable_device(pdev); if (err) return err; -- cgit v1.2.3 From af548a27b158d548d41e56255e6eaca1658cc3be Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 22 Feb 2019 07:27:41 -0300 Subject: selftests: fib_tests: sleep after changing carrier. again. Just like commit e2ba732a1681 ("selftests: fib_tests: sleep after changing carrier"), wait one second to allow linkwatch to propagate the carrier change to the stack. There are two sets of carrier tests. The first slept after the carrier was set to off, and when the second set ran, it was likely that the linkwatch would be able to run again without much delay, reducing the likelihood of a race. However, if you run 'fib_tests.sh -t carrier' on a loop, you will quickly notice the failures. Sleeping on the second set of tests make the failures go away. Cc: David Ahern Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 802b4af18729..1080ff55a788 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -388,6 +388,7 @@ fib_carrier_unicast_test() set -e $IP link set dev dummy0 carrier off + sleep 1 set +e echo " Carrier down" -- cgit v1.2.3 From 278e2148c07559dd4ad8602f22366d61eb2ee7b7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 22 Feb 2019 21:22:32 +0800 Subject: Revert "bridge: do not add port to router list when receives query with source 0.0.0.0" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") and commit 0fe5119e267f ("net: bridge: remove ipv6 zero address check in mcast queries") The reason is RFC 4541 is not a standard but suggestive. Currently we will elect 0.0.0.0 as Querier if there is no ip address configured on bridge. If we do not add the port which recives query with source 0.0.0.0 to router list, the IGMP reports will not be about to forward to Querier, IGMP data will also not be able to forward to dest. As Nikolay suggested, revert this change first and add a boolopt api to disable none-zero election in future if needed. Reported-by: Linus Lüssing Reported-by: Sebastian Gottschall Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Fixes: 0fe5119e267f ("net: bridge: remove ipv6 zero address check in mcast queries") Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3aeff0895669..ac92b2eb32b1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1204,14 +1204,7 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - - /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, - * the arrival port for IGMP Queries where the source address - * is 0.0.0.0 should not be added to router port list. - */ - if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - saddr->proto == htons(ETH_P_IPV6)) - br_multicast_mark_router(br, port); + br_multicast_mark_router(br, port); } static void br_ip4_multicast_query(struct net_bridge *br, -- cgit v1.2.3 From 99407d8fa3abfe41b04d9321a9df0a0e30a57fae Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Fri, 22 Feb 2019 20:07:45 +0100 Subject: net: dsa: Remove documentation for port_fdb_prepare This callback was removed some time ago, also remove the documentation. Fixes: 1b6dd556c304 ("net: dsa: Remove prepare phase for FDB") Signed-off-by: Hauke Mehrtens Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 25170ad7d25b..101f2b2c69ad 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -533,16 +533,12 @@ Bridge VLAN filtering function that the driver has to call for each VLAN the given port is a member of. A switchdev object is used to carry the VID and bridge flags. -- port_fdb_prepare: bridge layer function invoked when the bridge prepares the - installation of a Forwarding Database entry. If the operation is not - supported, this function should return -EOPNOTSUPP to inform the bridge code - to fallback to a software implementation. No hardware setup must be done in - this function. See port_fdb_add for this and details. - - port_fdb_add: bridge layer function invoked when the bridge wants to install a Forwarding Database entry, the switch hardware should be programmed with the specified address in the specified VLAN Id in the forwarding database - associated with this VLAN ID + associated with this VLAN ID. If the operation is not supported, this + function should return -EOPNOTSUPP to inform the bridge code to fallback to + a software implementation. Note: VLAN ID 0 corresponds to the port private database, which, in the context of DSA, would be the its port-based VLAN, used by the associated bridge device. -- cgit v1.2.3 From 797a22bd5298c2674d927893f46cadf619dad11d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Feb 2019 13:24:59 -0800 Subject: net/x25: fix a race in x25_bind() syzbot was able to trigger another soft lockup [1] I first thought it was the O(N^2) issue I mentioned in my prior fix (f657d22ee1f "net/x25: do not hold the cpu too long in x25_new_lci()"), but I eventually found that x25_bind() was not checking SOCK_ZAPPED state under socket lock protection. This means that multiple threads can end up calling x25_insert_socket() for the same socket, and corrupt x25_list [1] watchdog: BUG: soft lockup - CPU#0 stuck for 123s! [syz-executor.2:10492] Modules linked in: irq event stamp: 27515 hardirqs last enabled at (27514): [] trace_hardirqs_on_thunk+0x1a/0x1c hardirqs last disabled at (27515): [] trace_hardirqs_off_thunk+0x1a/0x1c softirqs last enabled at (32): [] x25_get_neigh+0xa3/0xd0 net/x25/x25_link.c:336 softirqs last disabled at (34): [] x25_find_socket+0x23/0x140 net/x25/af_x25.c:341 CPU: 0 PID: 10492 Comm: syz-executor.2 Not tainted 5.0.0-rc7+ #88 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__sanitizer_cov_trace_pc+0x4/0x50 kernel/kcov.c:97 Code: f4 ff ff ff e8 11 9f ea ff 48 c7 05 12 fb e5 08 00 00 00 00 e9 c8 e9 ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 <48> 8b 75 08 65 48 8b 04 25 40 ee 01 00 65 8b 15 38 0c 92 7e 81 e2 RSP: 0018:ffff88806e94fc48 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: 1ffff1100d84dac5 RBX: 0000000000000001 RCX: ffffc90006197000 RDX: 0000000000040000 RSI: ffffffff86324bf3 RDI: ffff88806c26d628 RBP: ffff88806e94fc48 R08: ffff88806c1c6500 R09: fffffbfff1282561 R10: fffffbfff1282560 R11: ffffffff89412b03 R12: ffff88806c26d628 R13: ffff888090455200 R14: dffffc0000000000 R15: 0000000000000000 FS: 00007f3a107e4700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3a107e3db8 CR3: 00000000a5544000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __x25_find_socket net/x25/af_x25.c:327 [inline] x25_find_socket+0x7d/0x140 net/x25/af_x25.c:342 x25_new_lci net/x25/af_x25.c:355 [inline] x25_connect+0x380/0xde0 net/x25/af_x25.c:784 __sys_connect+0x266/0x330 net/socket.c:1662 __do_sys_connect net/socket.c:1673 [inline] __se_sys_connect net/socket.c:1670 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1670 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f3a107e3c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e29 RDX: 0000000000000012 RSI: 0000000020000200 RDI: 0000000000000005 RBP: 000000000073c040 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3a107e46d4 R13: 00000000004be362 R14: 00000000004ceb98 R15: 00000000ffffffff Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 10493 Comm: syz-executor.3 Not tainted 5.0.0-rc7+ #88 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:193 [inline] RIP: 0010:queued_write_lock_slowpath+0x143/0x290 kernel/locking/qrwlock.c:86 Code: 4c 8d 2c 01 41 83 c7 03 41 0f b6 45 00 41 38 c7 7c 08 84 c0 0f 85 0c 01 00 00 8b 03 3d 00 01 00 00 74 1a f3 90 41 0f b6 55 00 <41> 38 d7 7c eb 84 d2 74 e7 48 89 df e8 cc aa 4e 00 eb dd be 04 00 RSP: 0018:ffff888085c47bd8 EFLAGS: 00000206 RAX: 0000000000000300 RBX: ffffffff89412b00 RCX: 1ffffffff1282560 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff89412b00 RBP: ffff888085c47c70 R08: 1ffffffff1282560 R09: fffffbfff1282561 R10: fffffbfff1282560 R11: ffffffff89412b03 R12: 00000000000000ff R13: fffffbfff1282560 R14: 1ffff11010b88f7d R15: 0000000000000003 FS: 00007fdd04086700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdd04064db8 CR3: 0000000090be0000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: queued_write_lock include/asm-generic/qrwlock.h:104 [inline] do_raw_write_lock+0x1d6/0x290 kernel/locking/spinlock_debug.c:203 __raw_write_lock_bh include/linux/rwlock_api_smp.h:204 [inline] _raw_write_lock_bh+0x3b/0x50 kernel/locking/spinlock.c:312 x25_insert_socket+0x21/0xe0 net/x25/af_x25.c:267 x25_bind+0x273/0x340 net/x25/af_x25.c:703 __sys_bind+0x23f/0x290 net/socket.c:1481 __do_sys_bind net/socket.c:1492 [inline] __se_sys_bind net/socket.c:1490 [inline] __x64_sys_bind+0x73/0xb0 net/socket.c:1490 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e29 Fixes: 90c27297a9bf ("X.25 remove bkl in bind") Signed-off-by: Eric Dumazet Cc: andrew hendry Signed-off-by: David S. Miller --- net/x25/af_x25.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ec3a828672ef..eff31348e20b 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -679,8 +679,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; int len, i, rc = 0; - if (!sock_flag(sk, SOCK_ZAPPED) || - addr_len != sizeof(struct sockaddr_x25) || + if (addr_len != sizeof(struct sockaddr_x25) || addr->sx25_family != AF_X25) { rc = -EINVAL; goto out; @@ -699,9 +698,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } lock_sock(sk); - x25_sk(sk)->source_addr = addr->sx25_addr; - x25_insert_socket(sk); - sock_reset_flag(sk, SOCK_ZAPPED); + if (sock_flag(sk, SOCK_ZAPPED)) { + x25_sk(sk)->source_addr = addr->sx25_addr; + x25_insert_socket(sk); + sock_reset_flag(sk, SOCK_ZAPPED); + } else { + rc = -EINVAL; + } release_sock(sk); SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); out: -- cgit v1.2.3 From bf50b606cfd85ac8d3d0adb711f3e22204059848 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Feb 2019 15:51:51 -0800 Subject: tcp: repaired skbs must init their tso_segs syzbot reported a WARN_ON(!tcp_skb_pcount(skb)) in tcp_send_loss_probe() [1] This was caused by TCP_REPAIR sent skbs that inadvertenly were missing a call to tcp_init_tso_segs() [1] WARNING: CPU: 1 PID: 0 at net/ipv4/tcp_output.c:2534 tcp_send_loss_probe+0x771/0x8a0 net/ipv4/tcp_output.c:2534 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.0.0-rc7+ #77 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x45 kernel/panic.c:571 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] fixup_bug arch/x86/kernel/traps.c:173 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:tcp_send_loss_probe+0x771/0x8a0 net/ipv4/tcp_output.c:2534 Code: 88 fc ff ff 4c 89 ef e8 ed 75 c8 fb e9 c8 fc ff ff e8 43 76 c8 fb e9 63 fd ff ff e8 d9 75 c8 fb e9 94 f9 ff ff e8 bf 03 91 fb <0f> 0b e9 7d fa ff ff e8 b3 03 91 fb 0f b6 1d 37 43 7a 03 31 ff 89 RSP: 0018:ffff8880ae907c60 EFLAGS: 00010206 RAX: ffff8880a989c340 RBX: 0000000000000000 RCX: ffffffff85dedbdb RDX: 0000000000000100 RSI: ffffffff85dee0b1 RDI: 0000000000000005 RBP: ffff8880ae907c90 R08: ffff8880a989c340 R09: ffffed10147d1ae1 R10: ffffed10147d1ae0 R11: ffff8880a3e8d703 R12: ffff888091b90040 R13: ffff8880a3e8d540 R14: 0000000000008000 R15: ffff888091b90860 tcp_write_timer_handler+0x5c0/0x8a0 net/ipv4/tcp_timer.c:583 tcp_write_timer+0x10e/0x1d0 net/ipv4/tcp_timer.c:607 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x180/0x1d0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:native_safe_halt+0x2/0x10 arch/x86/include/asm/irqflags.h:58 Code: ff ff ff 48 89 c7 48 89 45 d8 e8 59 0c a1 fa 48 8b 45 d8 e9 ce fe ff ff 48 89 df e8 48 0c a1 fa eb 82 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffff8880a98afd78 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: 1ffffffff1125061 RBX: ffff8880a989c340 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000001 RDI: ffff8880a989cbbc RBP: ffff8880a98afda8 R08: ffff8880a989c340 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 R13: ffffffff889282f8 R14: 0000000000000001 R15: 0000000000000000 arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555 default_idle_call+0x36/0x90 kernel/sched/idle.c:93 cpuidle_idle_call kernel/sched/idle.c:153 [inline] do_idle+0x386/0x570 kernel/sched/idle.c:262 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353 start_secondary+0x404/0x5c0 arch/x86/kernel/smpboot.c:271 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243 Kernel Offset: disabled Rebooting in 86400 seconds.. Fixes: 79861919b889 ("tcp: fix TCP_REPAIR xmit queue setup") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Andrey Vagin Cc: Soheil Hassas Yeganeh Cc: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 730bc44dbad9..ccc78f3a4b60 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2347,6 +2347,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); + tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ } -- cgit v1.2.3 From 4c8e0459b585e2a7b367545be3e102737f1e489f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 24 Feb 2019 01:11:15 +0100 Subject: net: phy: realtek: Dummy IRQ calls for RTL8366RB This fixes a regression introduced by commit 0d2e778e38e0ddffab4bb2b0e9ed2ad5165c4bf7 "net: phy: replace PHY_HAS_INTERRUPT with a check for config_intr and ack_interrupt". This assumes that a PHY cannot trigger interrupt unless it has .config_intr() or .ack_interrupt() implemented. A later patch makes the code assume both need to be implemented for interrupts to be present. But this PHY (which is inside a DSA) will happily fire interrupts without either callback. Implement dummy callbacks for .config_intr() and .ack_interrupt() in the phy header to fix this. Tested on the RTL8366RB on D-Link DIR-685. Fixes: 0d2e778e38e0 ("net: phy: replace PHY_HAS_INTERRUPT with a check for config_intr and ack_interrupt") Cc: Heiner Kallweit Signed-off-by: Linus Walleij Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 7 +++++++ include/linux/phy.h | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index c6010fb1aa0f..cb4a23041a94 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -282,6 +282,13 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8366RB Gigabit Ethernet", .features = PHY_GBIT_FEATURES, .config_init = &rtl8366rb_config_init, + /* These interrupts are handled by the irq controller + * embedded inside the RTL8366RB, they get unmasked when the + * irq is requested and ACKed by reading the status register, + * which is done by the irqchip code. + */ + .ack_interrupt = genphy_no_ack_interrupt, + .config_intr = genphy_no_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, }, diff --git a/include/linux/phy.h b/include/linux/phy.h index 127fcc9c3778..333b56d8f746 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -992,6 +992,14 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } +static inline int genphy_no_ack_interrupt(struct phy_device *phydev) +{ + return 0; +} +static inline int genphy_no_config_intr(struct phy_device *phydev) +{ + return 0; +} int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum); int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, -- cgit v1.2.3