From 3d071d8da1f586c24863a57349586a1611b9aa67 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <stephan.mueller@atsec.com>
Date: Thu, 15 Dec 2016 12:42:33 +0100
Subject: random: remove stale maybe_reseed_primary_crng

The function maybe_reseed_primary_crng is not used anywhere and thus can
be removed.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1ef26403bcc8..8e5ab20848c0 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -855,13 +855,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
-static inline void maybe_reseed_primary_crng(void)
-{
-	if (crng_init > 2 &&
-	    time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
-		crng_reseed(&primary_crng, &input_pool);
-}
-
 static inline void crng_wait_ready(void)
 {
 	wait_event_interruptible(crng_init_wait, crng_ready());
-- 
cgit v1.2.3


From 2e03c36f25ebb52d3358b8baebcdf96895c33a87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 27 Dec 2016 23:39:31 +0100
Subject: random: remove stale urandom_init_wait

The urandom_init_wait wait queue is a left over from the pre-ChaCha20
times and can therefore be savely removed.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8e5ab20848c0..482531d87fb8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -409,7 +409,6 @@ static struct poolinfo {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
 static struct fasync_struct *fasync;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
-- 
cgit v1.2.3


From 43d8a72cd985ca5279a9eb84d61fcbb3ee3d3774 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 27 Dec 2016 23:40:59 +0100
Subject: random: remove variable limit

The variable limit was used to identify the nonblocking pool's unlimited
random number generation. As the nonblocking pool is a thing of the
past, remove the limit variable and any conditions around it (i.e.
preserve the branches for limit == 1).

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 482531d87fb8..92d6dd24c86e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -466,7 +466,6 @@ struct entropy_store {
 	int entropy_count;
 	int entropy_total;
 	unsigned int initialized:1;
-	unsigned int limit:1;
 	unsigned int last_data_init:1;
 	__u8 last_data[EXTRACT_SIZE];
 };
@@ -484,7 +483,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
 static struct entropy_store input_pool = {
 	.poolinfo = &poolinfo_table[0],
 	.name = "input",
-	.limit = 1,
 	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 	.pool = input_pool_data
 };
@@ -492,7 +490,6 @@ static struct entropy_store input_pool = {
 static struct entropy_store blocking_pool = {
 	.poolinfo = &poolinfo_table[1],
 	.name = "blocking",
-	.limit = 1,
 	.pull = &input_pool,
 	.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
 	.pool = blocking_pool_data,
@@ -1212,15 +1209,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 	    r->entropy_count > r->poolinfo->poolfracbits)
 		return;
 
-	if (r->limit == 0 && random_min_urandom_seed) {
-		unsigned long now = jiffies;
-
-		if (time_before(now,
-				r->last_pulled + random_min_urandom_seed * HZ))
-			return;
-		r->last_pulled = now;
-	}
-
 	_xfer_secondary_pool(r, nbytes);
 }
 
@@ -1228,8 +1216,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 {
 	__u32	tmp[OUTPUT_POOL_WORDS];
 
-	/* For /dev/random's pool, always leave two wakeups' worth */
-	int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
 	int bytes = nbytes;
 
 	/* pull at least as much as a wakeup */
@@ -1240,7 +1226,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 	trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
 				  ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
 	bytes = extract_entropy(r->pull, tmp, bytes,
-				random_read_wakeup_bits / 8, rsvd_bytes);
+				random_read_wakeup_bits / 8, 0);
 	mix_pool_bytes(r, tmp, bytes);
 	credit_entropy_bits(r, bytes*8);
 }
@@ -1268,7 +1254,7 @@ static void push_to_pool(struct work_struct *work)
 static size_t account(struct entropy_store *r, size_t nbytes, int min,
 		      int reserved)
 {
-	int entropy_count, orig;
+	int entropy_count, orig, have_bytes;
 	size_t ibytes, nfrac;
 
 	BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
@@ -1277,14 +1263,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
 retry:
 	entropy_count = orig = ACCESS_ONCE(r->entropy_count);
 	ibytes = nbytes;
-	/* If limited, never pull more than available */
-	if (r->limit) {
-		int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+	/* never pull more than available */
+	have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
 
-		if ((have_bytes -= reserved) < 0)
-			have_bytes = 0;
-		ibytes = min_t(size_t, ibytes, have_bytes);
-	}
+	if ((have_bytes -= reserved) < 0)
+		have_bytes = 0;
+	ibytes = min_t(size_t, ibytes, have_bytes);
 	if (ibytes < min)
 		ibytes = 0;
 
-- 
cgit v1.2.3


From 5d0e5ea343a0f70351428476bcf8715e0731f26a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 27 Dec 2016 23:41:22 +0100
Subject: random: fix comment for unused random_min_urandom_seed

The variable random_min_urandom_seed is not needed any more as it
defined the reseeding behavior of the nonblocking pool. Though it is not
needed any more, it is left in the code for user space interface
compatibility.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 92d6dd24c86e..9d147d456598 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -313,9 +313,7 @@ static int random_read_wakeup_bits = 64;
 static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
 
 /*
- * The minimum number of seconds between urandom pool reseeding.  We
- * do this to limit the amount of entropy that can be drained from the
- * input pool even if there are heavy demands on /dev/urandom.
+ * Variable is currently unused by left for user space compatibility.
  */
 static int random_min_urandom_seed = 60;
 
-- 
cgit v1.2.3


From f5b98461cb8167ba362ad9f74c41d126b7becea7 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 6 Jan 2017 19:32:01 +0100
Subject: random: use chacha20 for get_random_int/long

Now that our crng uses chacha20, we can rely on its speedy
characteristics for replacing MD5, while simultaneously achieving a
higher security guarantee. Before the idea was to use these functions if
you wanted random integers that aren't stupidly insecure but aren't
necessarily secure either, a vague gray zone, that hopefully was "good
enough" for its users. With chacha20, we can strengthen this claim,
since either we're using an rdrand-like instruction, or we're using the
same crng as /dev/urandom. And it's faster than what was before.

We could have chosen to replace this with a SipHash-derived function,
which might be slightly faster, but at the cost of having yet another
RNG construction in the kernel. By moving to chacha20, we have a single
RNG to analyze and verify, and we also already get good performance
improvements on all platforms.

Implementation-wise, rather than use a generic buffer for both
get_random_int/long and memcpy based on the size needs, we use a
specific buffer for 32-bit reads and for 64-bit reads. This way, we're
guaranteed to always have aligned accesses on all platforms. While
slightly more verbose in C, the assembly this generates is a lot
simpler than otherwise.

Finally, on 32-bit platforms where longs and ints are the same size,
we simply alias get_random_int to get_random_long.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Suggested-by: Theodore Ts'o <tytso@mit.edu>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 84 ++++++++++++++++++++++++++------------------------
 include/linux/random.h |  1 -
 init/main.c            |  1 -
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9d147d456598..b800e5479b7d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2016,63 +2016,65 @@ struct ctl_table random_table[] = {
 };
 #endif 	/* CONFIG_SYSCTL */
 
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-
-int random_int_secret_init(void)
-{
-	get_random_bytes(random_int_secret, sizeof(random_int_secret));
-	return 0;
-}
-
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
-		__aligned(sizeof(unsigned long));
+struct batched_entropy {
+	union {
+		unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)];
+		unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)];
+	};
+	unsigned int position;
+};
 
 /*
- * Get a random word for internal kernel use only. Similar to urandom but
- * with the goal of minimal entropy pool depletion. As a result, the random
- * value is not cryptographically secure but for several uses the cost of
- * depleting entropy is too high
+ * Get a random word for internal kernel use only. The quality of the random
+ * number is either as good as RDRAND or as good as /dev/urandom, with the
+ * goal of being quite fast and not depleting entropy.
  */
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
+unsigned long get_random_long(void)
 {
-	__u32 *hash;
-	unsigned int ret;
+	unsigned long ret;
+	struct batched_entropy *batch;
 
-	if (arch_get_random_int(&ret))
+	if (arch_get_random_long(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = hash[0];
-	put_cpu_var(get_random_int_hash);
-
+	batch = &get_cpu_var(batched_entropy_long);
+	if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
+		extract_crng((u8 *)batch->entropy_long);
+		batch->position = 0;
+	}
+	ret = batch->entropy_long[batch->position++];
+	put_cpu_var(batched_entropy_long);
 	return ret;
 }
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_long);
 
-/*
- * Same as get_random_int(), but returns unsigned long.
- */
-unsigned long get_random_long(void)
+#if BITS_PER_LONG == 32
+unsigned int get_random_int(void)
 {
-	__u32 *hash;
-	unsigned long ret;
+	return get_random_long();
+}
+#else
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
+unsigned int get_random_int(void)
+{
+	unsigned int ret;
+	struct batched_entropy *batch;
 
-	if (arch_get_random_long(&ret))
+	if (arch_get_random_int(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = *(unsigned long *)hash;
-	put_cpu_var(get_random_int_hash);
-
+	batch = &get_cpu_var(batched_entropy_int);
+	if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
+		extract_crng((u8 *)batch->entropy_int);
+		batch->position = 0;
+	}
+	ret = batch->entropy_int[batch->position++];
+	put_cpu_var(batched_entropy_int);
 	return ret;
 }
-EXPORT_SYMBOL(get_random_long);
+#endif
+EXPORT_SYMBOL(get_random_int);
 
 /**
  * randomize_page - Generate a random, page aligned address
diff --git a/include/linux/random.h b/include/linux/random.h
index 7bd2403e4fef..16ab429735a7 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -37,7 +37,6 @@ extern void get_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
diff --git a/init/main.c b/init/main.c
index b0c9d6facef9..09beb7fc6e8c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -879,7 +879,6 @@ static void __init do_basic_setup(void)
 	do_ctors();
 	usermodehelper_enable();
 	do_initcalls();
-	random_int_secret_init();
 }
 
 static void __init do_pre_smp_initcalls(void)
-- 
cgit v1.2.3


From c440408cf6901eeb2c09563397e24a9097907078 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 22 Jan 2017 16:34:08 +0100
Subject: random: convert get_random_int/long into get_random_u32/u64

Many times, when a user wants a random number, he wants a random number
of a guaranteed size. So, thinking of get_random_int and get_random_long
in terms of get_random_u32 and get_random_u64 makes it much easier to
achieve this. It also makes the code simpler.

On 32-bit platforms, get_random_int and get_random_long are both aliased
to get_random_u32. On 64-bit platforms, int->u32 and long->u64.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 55 +++++++++++++++++++++++++-------------------------
 include/linux/random.h | 17 ++++++++++++++--
 2 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b800e5479b7d..066ae125f2c8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2018,8 +2018,8 @@ struct ctl_table random_table[] = {
 
 struct batched_entropy {
 	union {
-		unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)];
-		unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)];
+		u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
+		u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
 	};
 	unsigned int position;
 };
@@ -2029,52 +2029,51 @@ struct batched_entropy {
  * number is either as good as RDRAND or as good as /dev/urandom, with the
  * goal of being quite fast and not depleting entropy.
  */
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
-unsigned long get_random_long(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
+u64 get_random_u64(void)
 {
-	unsigned long ret;
+	u64 ret;
 	struct batched_entropy *batch;
 
-	if (arch_get_random_long(&ret))
+#if BITS_PER_LONG == 64
+	if (arch_get_random_long((unsigned long *)&ret))
 		return ret;
+#else
+	if (arch_get_random_long((unsigned long *)&ret) &&
+	    arch_get_random_long((unsigned long *)&ret + 1))
+	    return ret;
+#endif
 
-	batch = &get_cpu_var(batched_entropy_long);
-	if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
-		extract_crng((u8 *)batch->entropy_long);
+	batch = &get_cpu_var(batched_entropy_u64);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
 	}
-	ret = batch->entropy_long[batch->position++];
-	put_cpu_var(batched_entropy_long);
+	ret = batch->entropy_u64[batch->position++];
+	put_cpu_var(batched_entropy_u64);
 	return ret;
 }
-EXPORT_SYMBOL(get_random_long);
+EXPORT_SYMBOL(get_random_u64);
 
-#if BITS_PER_LONG == 32
-unsigned int get_random_int(void)
-{
-	return get_random_long();
-}
-#else
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
+u32 get_random_u32(void)
 {
-	unsigned int ret;
+	u32 ret;
 	struct batched_entropy *batch;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
-	batch = &get_cpu_var(batched_entropy_int);
-	if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
-		extract_crng((u8 *)batch->entropy_int);
+	batch = &get_cpu_var(batched_entropy_u32);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
 	}
-	ret = batch->entropy_int[batch->position++];
-	put_cpu_var(batched_entropy_int);
+	ret = batch->entropy_u32[batch->position++];
+	put_cpu_var(batched_entropy_u32);
 	return ret;
 }
-#endif
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_u32);
 
 /**
  * randomize_page - Generate a random, page aligned address
diff --git a/include/linux/random.h b/include/linux/random.h
index 16ab429735a7..ed5c3838780d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -42,8 +42,21 @@ extern void get_random_bytes_arch(void *buf, int nbytes);
 extern const struct file_operations random_fops, urandom_fops;
 #endif
 
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+	return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+	return get_random_u64();
+#else
+	return get_random_u32();
+#endif
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
-- 
cgit v1.2.3


From db61ffe3a71c697aaa91c42c862a5f7557a0e562 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 31 Jan 2017 14:36:07 -0200
Subject: random: move random_min_urandom_seed into CONFIG_SYSCTL ifdef block

Building arm allnodefconfig causes the following build warning:

drivers/char/random.c:318:12: warning: 'random_min_urandom_seed' defined but not used [-Wunused-variable]

Fix the warning by moving 'random_min_urandom_seed' declaration inside
the CONFIG_SYSCTL ifdef block, where it is actually used.

While at it, remove the comment prior to the variable declaration.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 066ae125f2c8..0ab024918907 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -312,11 +312,6 @@ static int random_read_wakeup_bits = 64;
  */
 static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
 
-/*
- * Variable is currently unused by left for user space compatibility.
- */
-static int random_min_urandom_seed = 60;
-
 /*
  * Originally, we used a primitive polynomial of degree .poolwords
  * over GF(2).  The taps for various sizes are defined below.  They
@@ -1886,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 static int min_read_thresh = 8, min_write_thresh;
 static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
 static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int random_min_urandom_seed = 60;
 static char sysctl_bootid[16];
 
 /*
-- 
cgit v1.2.3


From c282222a45cb9503cbfbebfdb60491f06ae84b49 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 8 Feb 2017 11:52:29 +0100
Subject: xfrm: policy: init locks early

Dmitry reports following splat:
 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 CPU: 0 PID: 13059 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170207 #1
[..]
 spin_lock_bh include/linux/spinlock.h:304 [inline]
 xfrm_policy_flush+0x32/0x470 net/xfrm/xfrm_policy.c:963
 xfrm_policy_fini+0xbf/0x560 net/xfrm/xfrm_policy.c:3041
 xfrm_net_init+0x79f/0x9e0 net/xfrm/xfrm_policy.c:3091
 ops_init+0x10a/0x530 net/core/net_namespace.c:115
 setup_net+0x2ed/0x690 net/core/net_namespace.c:291
 copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396
 create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106
 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205
 SYSC_unshare kernel/fork.c:2281 [inline]

Problem is that when we get error during xfrm_net_init we will call
xfrm_policy_fini which will acquire xfrm_policy_lock before it was
initialized.  Just move it around so locks get set up first.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 283bc9f35bbbcb0e9 ("xfrm: Namespacify xfrm state/policy locks")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 177e208e8ff5..3c8f5b70abf8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3062,6 +3062,11 @@ static int __net_init xfrm_net_init(struct net *net)
 {
 	int rv;
 
+	/* Initialize the per-net locks here */
+	spin_lock_init(&net->xfrm.xfrm_state_lock);
+	spin_lock_init(&net->xfrm.xfrm_policy_lock);
+	mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
 	rv = xfrm_statistics_init(net);
 	if (rv < 0)
 		goto out_statistics;
@@ -3078,11 +3083,6 @@ static int __net_init xfrm_net_init(struct net *net)
 	if (rv < 0)
 		goto out;
 
-	/* Initialize the per-net locks here */
-	spin_lock_init(&net->xfrm.xfrm_state_lock);
-	spin_lock_init(&net->xfrm.xfrm_policy_lock);
-	mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
 	return 0;
 
 out:
-- 
cgit v1.2.3


From 7e91c7df29b5e196de3dc6f086c8937973bd0b88 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 7 Feb 2017 19:58:02 +0200
Subject: swiotlb-xen: implement xen_swiotlb_dma_mmap callback

This function creates userspace mapping for the DMA-coherent memory.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@globallogic.com>
Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/arm/xen/mm.c         |  1 +
 drivers/xen/swiotlb-xen.c | 19 +++++++++++++++++++
 include/xen/swiotlb-xen.h |  5 +++++
 3 files changed, 25 insertions(+)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index bd62d94f8ac5..cd1684e4e864 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -198,6 +198,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 	.unmap_page = xen_swiotlb_unmap_page,
 	.dma_supported = xen_swiotlb_dma_supported,
 	.set_dma_mask = xen_swiotlb_set_dma_mask,
+	.mmap = xen_swiotlb_dma_mmap,
 };
 
 int __init xen_mm_init(void)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index f905d6eeb048..c7f61fc0572a 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -680,3 +680,22 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask);
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		     unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	if (__generic_dma_ops(dev)->mmap)
+		return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr,
+						    dma_addr, size, attrs);
+#endif
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap);
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index a0083be5d529..a315c876aaa9 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -55,4 +55,9 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
 extern int
 xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask);
+
+extern int
+xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		     unsigned long attrs);
 #endif /* __LINUX_SWIOTLB_XEN_H */
-- 
cgit v1.2.3


From 69369f52d28a34c84acb6f2a8a585e743441566a Mon Sep 17 00:00:00 2001
From: Andrii Anisov <andrii_anisov@epam.com>
Date: Tue, 7 Feb 2017 19:58:03 +0200
Subject: swiotlb-xen: implement xen_swiotlb_get_sgtable callback

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/arm/xen/mm.c         |  1 +
 drivers/xen/swiotlb-xen.c | 28 ++++++++++++++++++++++++++++
 include/xen/swiotlb-xen.h |  6 ++++++
 3 files changed, 35 insertions(+)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index cd1684e4e864..76ea48a614e1 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 	.dma_supported = xen_swiotlb_dma_supported,
 	.set_dma_mask = xen_swiotlb_set_dma_mask,
 	.mmap = xen_swiotlb_dma_mmap,
+	.get_sgtable = xen_swiotlb_get_sgtable,
 };
 
 int __init xen_mm_init(void)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c7f61fc0572a..23e30b4e1fb6 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -699,3 +699,31 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap);
+
+/*
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+			void *cpu_addr, dma_addr_t handle, size_t size,
+			unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	if (__generic_dma_ops(dev)->get_sgtable) {
+#if 0
+	/*
+	 * This check verifies that the page belongs to the current domain and
+	 * is not one mapped from another domain.
+	 * This check is for debug only, and should not go to production build
+	 */
+		unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
+		BUG_ON (!page_is_ram(bfn));
+#endif
+		return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
+							   handle, size, attrs);
+	}
+#endif
+	return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable);
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index a315c876aaa9..1f6d78f044b6 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -2,6 +2,7 @@
 #define __LINUX_SWIOTLB_XEN_H
 
 #include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
 #include <linux/swiotlb.h>
 
 extern int xen_swiotlb_init(int verbose, bool early);
@@ -60,4 +61,9 @@ extern int
 xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		     unsigned long attrs);
+
+extern int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+			void *cpu_addr, dma_addr_t handle, size_t size,
+			unsigned long attrs);
 #endif /* __LINUX_SWIOTLB_XEN_H */
-- 
cgit v1.2.3


From 4c86d77743a54fb2d8a4d18a037a074c892bb3be Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 14 Feb 2017 07:43:56 +0100
Subject: xfrm: Don't use sk_family for socket policy lookups

On IPv4-mapped IPv6 addresses sk_family is AF_INET6,
but the flow informations are created based on AF_INET.
So the routing set up 'struct flowi4' but we try to
access 'struct flowi6' what leads to an out of bounds
access. Fix this by using the family we get with the
dst_entry, like we do it for the standard policy lookup.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3c8f5b70abf8..a9af17f0fce6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1248,7 +1248,7 @@ static inline int policy_to_flow_dir(int dir)
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
-						 const struct flowi *fl)
+						 const struct flowi *fl, u16 family)
 {
 	struct xfrm_policy *pol;
 
@@ -1256,8 +1256,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  again:
 	pol = rcu_dereference(sk->sk_policy[dir]);
 	if (pol != NULL) {
-		bool match = xfrm_selector_match(&pol->selector, fl,
-						 sk->sk_family);
+		bool match = xfrm_selector_match(&pol->selector, fl, family);
 		int err = 0;
 
 		if (match) {
@@ -2253,7 +2252,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 	sk = sk_const_to_full_sk(sk);
 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
 		num_pols = 1;
-		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
 		err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 		if (err < 0)
@@ -2532,7 +2531,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	pol = NULL;
 	sk = sk_to_full_sk(sk);
 	if (sk && sk->sk_policy[dir]) {
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
 		if (IS_ERR(pol)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
 			return 0;
-- 
cgit v1.2.3


From 179125085bd4ca70e8e028913193a93653bd12f7 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 14 Feb 2017 10:26:03 -0800
Subject: ARM: OMAP3: Fix smartreflex platform data regression

Commit d9d9cec02835 ("ARM: OMAP2+: Remove legacy data from hwmod for
omap3") dropped platform data that should no longer be used as we're
booting with device tree. It turns out that smartreflex is still
using platform data and produces the following errors during probe:

smartreflex smartreflex.0: invalid resource
smartreflex smartreflex.0: omap_sr_probe: ioremap fail
smartreflex: probe of smartreflex.0 failed with error -22
smartreflex smartreflex.1: invalid resource
smartreflex smartreflex.1: omap_sr_probe: ioremap fail
smartreflex: probe of smartreflex.1 failed with error -22

Let's fix the regression by adding back the smartreflex hwmod data.
The long term is to update the smartreflex driver to use device tree
based probing.

Fixes: d9d9cec02835 ("ARM: OMAP2+: Remove legacy data from hwmod
for omap3")
Reported-by: Adam Ford <aford173@gmail.com>
Tested-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 56f917ec8621..507ff0795a8e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2112,11 +2112,20 @@ static struct omap_hwmod_ocp_if omap3_l4_core__i2c3 = {
 };
 
 /* L4 CORE -> SR1 interface */
+static struct omap_hwmod_addr_space omap3_sr1_addr_space[] = {
+	{
+		.pa_start	= OMAP34XX_SR1_BASE,
+		.pa_end		= OMAP34XX_SR1_BASE + SZ_1K - 1,
+		.flags		= ADDR_TYPE_RT,
+	},
+	{ },
+};
 
 static struct omap_hwmod_ocp_if omap34xx_l4_core__sr1 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap34xx_sr1_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr1_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
@@ -2124,15 +2133,25 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr1 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap36xx_sr1_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr1_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
 /* L4 CORE -> SR1 interface */
+static struct omap_hwmod_addr_space omap3_sr2_addr_space[] = {
+	{
+		.pa_start	= OMAP34XX_SR2_BASE,
+		.pa_end		= OMAP34XX_SR2_BASE + SZ_1K - 1,
+		.flags		= ADDR_TYPE_RT,
+	},
+	{ },
+};
 
 static struct omap_hwmod_ocp_if omap34xx_l4_core__sr2 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap34xx_sr2_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr2_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
@@ -2140,6 +2159,7 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr2 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap36xx_sr2_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr2_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
-- 
cgit v1.2.3


From e3dc847a5f85b43ee2bfc8eae407a7e383483228 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 15 Feb 2017 11:38:58 +0100
Subject: vti6: Don't report path MTU below IPV6_MIN_MTU.

In vti6_xmit(), the check for IPV6_MIN_MTU before we
send a ICMPV6_PKT_TOOBIG message is missing. So we might
report a PMTU below 1280. Fix this by adding the required
check.

Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ip6_vti.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d82042c8d8fd..9156d6b9eb24 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -484,11 +484,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	if (!skb->ignore_df && skb->len > mtu) {
 		skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
 
-		if (skb->protocol == htons(ETH_P_IPV6))
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		else
+		} else {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 				  htonl(mtu));
+		}
 
 		return -EMSGSIZE;
 	}
-- 
cgit v1.2.3


From 448c077eeb02240c430db2a2c3bf5285a4c65d66 Mon Sep 17 00:00:00 2001
From: Matthijs van Duin <matthijsvanduin@gmail.com>
Date: Thu, 16 Feb 2017 01:05:04 +0100
Subject: ARM: OMAP5 / DRA7: Fix HYP mode boot for thumb2 build

'adr' yields a data-pointer, not a function-pointer.

Fixes: 999f934de195 ("ARM: omap5/dra7xx: Enable booting secondary
CPU in HYP mode")
Signed-off-by: Matthijs van Duin <matthijsvanduin@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap-headsmp.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S
index fe36ce2734d4..4c6f14cf92a8 100644
--- a/arch/arm/mach-omap2/omap-headsmp.S
+++ b/arch/arm/mach-omap2/omap-headsmp.S
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 
 #include "omap44xx.h"
 
@@ -66,7 +67,7 @@ wait_2:	ldr	r2, =AUX_CORE_BOOT0_PA	@ read from AuxCoreBoot0
 	cmp	r0, r4
 	bne	wait_2
 	ldr	r12, =API_HYP_ENTRY
-	adr	r0, hyp_boot
+	badr	r0, hyp_boot
 	smc	#0
 hyp_boot:
 	b	omap_secondary_startup
-- 
cgit v1.2.3


From f655e67ac8d797425abb0404d0878758f3f71c1a Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 16 Feb 2017 14:10:01 +0800
Subject: drm/i915/gvt: Fix check error on opregion.c

As we switched to memremap for opregion, shouldn't use any __iomem
for that, and move to use memcpy instead.

This fixed static check errors for:

  CHECK   drivers/gpu/drm/i915//gvt/opregion.c
  drivers/gpu/drm/i915//gvt/opregion.c:142:31: warning: incorrect type in argument 1 (different address spaces)
  drivers/gpu/drm/i915//gvt/opregion.c:142:31:    expected void *addr
  drivers/gpu/drm/i915//gvt/opregion.c:142:31:    got void [noderef] <asn:2>*opregion_va
  drivers/gpu/drm/i915//gvt/opregion.c:160:35: warning: incorrect type in assignment (different address spaces)
  drivers/gpu/drm/i915//gvt/opregion.c:160:35:    expected void [noderef] <asn:2>*opregion_va
  drivers/gpu/drm/i915//gvt/opregion.c:160:35:    got void *

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h      | 2 +-
 drivers/gpu/drm/i915/gvt/opregion.c | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index e227caf5859e..5ee660077b0c 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -203,7 +203,7 @@ struct intel_gvt_firmware {
 };
 
 struct intel_gvt_opregion {
-	void __iomem *opregion_va;
+	void *opregion_va;
 	u32 opregion_pa;
 };
 
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index d9fb41ab7119..5d1caf9daba9 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -27,7 +27,6 @@
 
 static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
 {
-	void __iomem *host_va = vgpu->gvt->opregion.opregion_va;
 	u8 *buf;
 	int i;
 
@@ -43,8 +42,8 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
 	if (!vgpu_opregion(vgpu)->va)
 		return -ENOMEM;
 
-	memcpy_fromio(vgpu_opregion(vgpu)->va, host_va,
-			INTEL_GVT_OPREGION_SIZE);
+	memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va,
+	       INTEL_GVT_OPREGION_SIZE);
 
 	for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
 		vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;
-- 
cgit v1.2.3


From fd64be636708d808852c4c8c1efce0a0a51c24c5 Mon Sep 17 00:00:00 2001
From: Min He <min.he@intel.com>
Date: Fri, 17 Feb 2017 15:02:36 +0800
Subject: drm/i915/gvt: introduced failsafe mode into vgpu

New failsafe mode is introduced, when we detect guest not supporting
GVT-g.
In failsafe mode, we will ignore all the MMIO and cfg space read/write
from guest.

This patch can fix the issue that when guest kernel or graphics driver
version is too low, there will be a lot of kernel traces in host.

V5: rebased onto latest gvt-staging
V4: changed coding style by Zhenyu and Ping's advice
V3: modified coding style and error messages according to Zhenyu's comment
V2: 1) implemented MMIO/GTT/WP pages read/write logic; 2) used a unified
function to enter failsafe mode

Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c |  3 ++
 drivers/gpu/drm/i915/gvt/gvt.h       |  6 ++++
 drivers/gpu/drm/i915/gvt/handlers.c  | 36 +++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/mmio.c      | 62 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/vgpu.c      |  6 +++-
 5 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index 4a6a2ed65732..a77e050b85a3 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -237,6 +237,9 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 {
 	int ret;
 
+	if (vgpu->failsafe)
+		return 0;
+
 	if (WARN_ON(bytes > 4))
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 5ee660077b0c..48ef0771d772 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -143,6 +143,8 @@ struct intel_vgpu {
 	int id;
 	unsigned long handle; /* vGPU handle used by hypervisor MPT modules */
 	bool active;
+	bool pv_notified;
+	bool failsafe;
 	bool resetting;
 	void *sched_data;
 
@@ -449,6 +451,10 @@ struct intel_gvt_ops {
 };
 
 
+enum {
+	GVT_FAILSAFE_UNSUPPORTED_GUEST,
+};
+
 #include "mpt.h"
 
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 1d450627ff65..6f098bb110bd 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -150,10 +150,34 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
 #define fence_num_to_offset(num) \
 	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
 
+
+static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+{
+	switch (reason) {
+	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
+		pr_err("Detected your guest driver doesn't support GVT-g.\n");
+		break;
+	default:
+		break;
+	}
+	pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id);
+	vgpu->failsafe = true;
+}
+
 static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
 		unsigned int fence_num, void *p_data, unsigned int bytes)
 {
 	if (fence_num >= vgpu_fence_sz(vgpu)) {
+
+		/* When guest access oob fence regs without access
+		 * pv_info first, we treat guest not supporting GVT,
+		 * and we will let vgpu enter failsafe mode.
+		 */
+		if (!vgpu->pv_notified) {
+			enter_failsafe_mode(vgpu,
+					GVT_FAILSAFE_UNSUPPORTED_GUEST);
+			return -EINVAL;
+		}
 		gvt_err("vgpu%d: found oob fence register access\n",
 				vgpu->id);
 		gvt_err("vgpu%d: total fence num %d access fence num %d\n",
@@ -1001,6 +1025,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 	if (invalid_read)
 		gvt_err("invalid pvinfo read: [%x:%x] = %x\n",
 				offset, bytes, *(u32 *)p_data);
+	vgpu->pv_notified = true;
 	return 0;
 }
 
@@ -1318,6 +1343,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	bool enable_execlist;
 
 	write_vreg(vgpu, offset, p_data, bytes);
+
+	/* when PPGTT mode enabled, we will check if guest has called
+	 * pvinfo, if not, we will treat this guest as non-gvtg-aware
+	 * guest, and stop emulating its cfg space, mmio, gtt, etc.
+	 */
+	if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) ||
+			(data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)))
+			&& !vgpu->pv_notified) {
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+		return 0;
+	}
 	if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))
 			|| (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
 		enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 4df078bc5d04..b2d72dad1537 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
 	(reg >= gvt->device_info.gtt_start_offset \
 	 && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt))
 
+static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
+		void *p_data, unsigned int bytes, bool read)
+{
+	struct intel_gvt *gvt = NULL;
+	void *pt = NULL;
+	unsigned int offset = 0;
+
+	if (!vgpu || !p_data)
+		return;
+
+	gvt = vgpu->gvt;
+	mutex_lock(&gvt->lock);
+	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
+	if (reg_is_mmio(gvt, offset)) {
+		if (read)
+			intel_vgpu_default_mmio_read(vgpu, offset, p_data,
+					bytes);
+		else
+			intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+					bytes);
+	} else if (reg_is_gtt(gvt, offset) &&
+			vgpu->gtt.ggtt_mm->virtual_page_table) {
+		offset -= gvt->device_info.gtt_start_offset;
+		pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset;
+		if (read)
+			memcpy(p_data, pt, bytes);
+		else
+			memcpy(pt, p_data, bytes);
+
+	} else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
+		struct intel_vgpu_guest_page *gp;
+
+		/* Since we enter the failsafe mode early during guest boot,
+		 * guest may not have chance to set up its ppgtt table, so
+		 * there should not be any wp pages for guest. Keep the wp
+		 * related code here in case we need to handle it in furture.
+		 */
+		gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT);
+		if (gp) {
+			/* remove write protection to prevent furture traps */
+			intel_vgpu_clean_guest_page(vgpu, gp);
+			if (read)
+				intel_gvt_hypervisor_read_gpa(vgpu, pa,
+						p_data, bytes);
+			else
+				intel_gvt_hypervisor_write_gpa(vgpu, pa,
+						p_data, bytes);
+		}
+	}
+	mutex_unlock(&gvt->lock);
+}
+
 /**
  * intel_vgpu_emulate_mmio_read - emulate MMIO read
  * @vgpu: a vGPU
@@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 	unsigned int offset = 0;
 	int ret = -EINVAL;
 
+
+	if (vgpu->failsafe) {
+		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true);
+		return 0;
+	}
 	mutex_lock(&gvt->lock);
 
 	if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
@@ -188,6 +245,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 	u32 old_vreg = 0, old_sreg = 0;
 	int ret = -EINVAL;
 
+	if (vgpu->failsafe) {
+		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false);
+		return 0;
+	}
+
 	mutex_lock(&gvt->lock);
 
 	if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 95a97aa0051e..dcfcce1dc00e 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -387,8 +387,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		populate_pvinfo_page(vgpu);
 		intel_vgpu_reset_display(vgpu);
 
-		if (dmlr)
+		if (dmlr) {
 			intel_vgpu_reset_cfg_space(vgpu);
+			/* only reset the failsafe mode when dmlr reset */
+			vgpu->failsafe = false;
+			vgpu->pv_notified = false;
+		}
 	}
 
 	vgpu->resetting = false;
-- 
cgit v1.2.3


From d1be371d4f4c12d11023c9fc795e5d460d960680 Mon Sep 17 00:00:00 2001
From: "Zhao, Xinda" <xinda.zhao@intel.com>
Date: Fri, 17 Feb 2017 14:38:33 +0800
Subject: drm/i915/gvt: handle fence reg access during GPU reset

Lots of reduntant log info will be printed out during GPU reset,
including accessing untracked mmio register and fence register,
variable disable_warn_untrack is added previously to handle the
situation, but the accessing of fence register is ignored in the
previously patch, so add it back.

Besides, set the variable disable_warn_untrack to the defalut value
after GPU reset is finished.

Signed-off-by: Zhao, Xinda <xinda.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++------
 drivers/gpu/drm/i915/gvt/mmio.c     |  2 ++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 6f098bb110bd..fd7e789a72c3 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -173,16 +173,19 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
 		 * pv_info first, we treat guest not supporting GVT,
 		 * and we will let vgpu enter failsafe mode.
 		 */
-		if (!vgpu->pv_notified) {
+		if (!vgpu->pv_notified)
 			enter_failsafe_mode(vgpu,
 					GVT_FAILSAFE_UNSUPPORTED_GUEST);
-			return -EINVAL;
+
+		if (!vgpu->mmio.disable_warn_untrack) {
+			gvt_err("vgpu%d: found oob fence register access\n",
+					vgpu->id);
+			gvt_err("vgpu%d: total fence %d, access fence %d\n",
+					vgpu->id, vgpu_fence_sz(vgpu),
+					fence_num);
 		}
-		gvt_err("vgpu%d: found oob fence register access\n",
-				vgpu->id);
-		gvt_err("vgpu%d: total fence num %d access fence num %d\n",
-				vgpu->id, vgpu_fence_sz(vgpu), fence_num);
 		memset(p_data, 0, bytes);
+		return -EINVAL;
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index b2d72dad1537..99abb01fa9eb 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -384,6 +384,8 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu)
 
 	/* set the bit 0:2(Core C-State ) to C0 */
 	vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
+
+	vgpu->mmio.disable_warn_untrack = false;
 }
 
 /**
-- 
cgit v1.2.3


From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 13 Jan 2017 15:07:51 -0500
Subject: xen: do not re-use pirq number cached in pci device msi msg data

Revert the main part of commit:
af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests")

That commit introduced reading the pci device's msi message data to see
if a pirq was previously configured for the device's msi/msix, and re-use
that pirq.  At the time, that was the correct behavior.  However, a
later change to Qemu caused it to call into the Xen hypervisor to unmap
all pirqs for a pci device, when the pci device disables its MSI/MSIX
vectors; specifically the Qemu commit:
c976437c7dba9c7444fb41df45468968aaa326ad
("qemu-xen: free all the pirqs for msi/msix when driver unload")

Once Qemu added this pirq unmapping, it was no longer correct for the
kernel to re-use the pirq number cached in the pci device msi message
data.  All Qemu releases since 2.1.0 contain the patch that unmaps the
pirqs when the pci device disables its MSI/MSIX vectors.

This bug is causing failures to initialize multiple NVMe controllers
under Xen, because the NVMe driver sets up a single MSIX vector for
each controller (concurrently), and then after using that to talk to
the controller for some configuration data, it disables the single MSIX
vector and re-configures all the MSIX vectors it needs.  So the MSIX
setup code tries to re-use the cached pirq from the first vector
for each controller, but the hypervisor has already given away that
pirq to another controller, and its initialization fails.

This is discussed in more detail at:
https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html

Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests")
Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/pci/xen.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e1fb269c87af..292ab0364a89 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		return 1;
 
 	for_each_pci_msi_entry(msidesc, dev) {
-		__pci_read_msi_msg(msidesc, &msg);
-		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
-			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
-		if (msg.data != XEN_PIRQ_MSI_DATA ||
-		    xen_irq_from_pirq(pirq) < 0) {
-			pirq = xen_allocate_pirq_msi(dev, msidesc);
-			if (pirq < 0) {
-				irq = -ENODEV;
-				goto error;
-			}
-			xen_msi_compose_msg(dev, pirq, &msg);
-			__pci_write_msi_msg(msidesc, &msg);
-			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
-		} else {
-			dev_dbg(&dev->dev,
-				"xen: msi already bound to pirq=%d\n", pirq);
+		pirq = xen_allocate_pirq_msi(dev, msidesc);
+		if (pirq < 0) {
+			irq = -ENODEV;
+			goto error;
 		}
+		xen_msi_compose_msg(dev, pirq, &msg);
+		__pci_write_msi_msg(msidesc, &msg);
+		dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
 					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
 					       (type == PCI_CAP_ID_MSIX) ?
-- 
cgit v1.2.3


From a33fc7a0482a40068c022aefcefd50f9f0f44f87 Mon Sep 17 00:00:00 2001
From: Min He <min.he@intel.com>
Date: Fri, 17 Feb 2017 16:42:38 +0800
Subject: drm/i915/gvt: enter failsafe mode when guest requires more resources

Windows guest will notitfy GVT-g to request more resources through g2v
interface, when its resources are not enough.
This patch is to handle this case and let vgpu enter failsafe mode to
avoid too many error messages.

Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h      | 1 +
 drivers/gpu/drm/i915/gvt/handlers.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 48ef0771d772..a41b322d8957 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -453,6 +453,7 @@ struct intel_gvt_ops {
 
 enum {
 	GVT_FAILSAFE_UNSUPPORTED_GUEST,
+	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
 };
 
 #include "mpt.h"
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index fd7e789a72c3..e1a382645112 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
 		pr_err("Detected your guest driver doesn't support GVT-g.\n");
 		break;
+	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
+		pr_err("Graphics resource is not enough for the guest\n");
 	default:
 		break;
 	}
@@ -1106,6 +1108,9 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	case _vgtif_reg(execlist_context_descriptor_lo):
 	case _vgtif_reg(execlist_context_descriptor_hi):
 		break;
+	case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]):
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
+		break;
 	default:
 		gvt_err("invalid pvinfo write offset %x bytes %x data %x\n",
 				offset, bytes, data);
-- 
cgit v1.2.3


From 9272f73f79bd780502134f227fa52fd280ecda17 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Fri, 17 Feb 2017 19:29:52 +0800
Subject: drm/i915/gvt: add a NULL pointer check to avoid kernel panic

Due to the request replay, context switch interrupt may come after
gvt free the workload thus can cause a kernel NULL pointer kernel
panic. This patch will add a simple check to avoid this for a short
term.

From long term, gvt workload lifecycle doesn't match with i915 request
and need to find a proper way to manage this.

v4: simplify the NULL pointer check.
v5: add unlikely to optimize.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d6b6d0efdd1a..e355a82ccabd 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -139,6 +139,9 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	struct intel_vgpu_workload *workload =
 		scheduler->current_workload[req->engine->id];
 
+	if (unlikely(!workload))
+		return NOTIFY_OK;
+
 	switch (action) {
 	case INTEL_CONTEXT_SCHEDULE_IN:
 		intel_gvt_load_render_mmio(workload->vgpu,
-- 
cgit v1.2.3


From 593e59b4b941910e4f7ba87d3c02d63e38e960d0 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Mon, 20 Feb 2017 15:51:13 +0800
Subject: drm/i915/gvt: fix unhandled mmio warnings

some registers were missing or treated as BDW only. This patch is to fix it
avoid unhandled mmio wanrings

v2: update commit message according to zhenyu's comment

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index e1a382645112..e1abece853d4 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1519,6 +1519,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL);
 
 	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL);
@@ -2390,9 +2392,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW);
-	MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW);
-	MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW);
+	MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW_PLUS);
+	MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW_PLUS);
+	MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS);
 
 	MMIO_D(WM_MISC, D_BDW);
 	MMIO_D(BDW_EDP_PSR_BASE, D_BDW);
@@ -2406,7 +2408,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS);
 	MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
 
-	MMIO_D(0xfdc, D_BDW);
+	MMIO_D(0xfdc, D_BDW_PLUS);
 	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS);
 	MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS);
@@ -2423,6 +2425,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_D(0x22040, D_BDW_PLUS);
+	MMIO_D(0x44484, D_BDW_PLUS);
+	MMIO_D(0x4448c, D_BDW_PLUS);
+
 	MMIO_D(0x83a4, D_BDW);
 	MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
 
@@ -2668,6 +2674,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
 
 	MMIO_D(0x44500, D_SKL);
+	MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS);
 	return 0;
 }
 
-- 
cgit v1.2.3


From d8e9b2b9097c117880dc22933239d05199c60b96 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 20 Feb 2017 14:58:25 +0100
Subject: drm/i915/gvt: Fix superfluous newline in GVT_DISPLAY_READY env var

send_display_send_uevent() sends two environment variable, and the
first one GVT_DISPLAY_READY is set including a new line at the end of
the string; that is obviously superfluous and wrong -- at least, it
*looks* so when you only read the code.

However, it doesn't appear in the actual output by a (supposedly
unexpected) trick.  The code uses snprintf() and truncates the string
in size 20 bytes.  This makes the string as GVT_DISPLAY_READY=0 or
...=1 including the trailing NUL-letter.  That is, the '\n' found in
the format string is always cut off as a result.

Although the code gives the correct result, it is confusing.  This
patch addresses it, just removing the superfluous '\n' from the format
string for avoiding further confusion.  If the argument "ready" were
not a  bool, the size 20 should be corrected as well.  But it's a
bool, so we can leave the magic number 20 as is for now.

FWIW, the bug was spotted by a new GCC7 warning:
  drivers/gpu/drm/i915/gvt/handlers.c: In function 'pvinfo_mmio_write':
  drivers/gpu/drm/i915/gvt/handlers.c:1042:34: error: 'snprintf' output truncated before the last format character [-Werror=format-truncation=]
    snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
                                    ^~~~~~~~~~~~~~~~~~~~~~~~
  drivers/gpu/drm/i915/gvt/handlers.c:1042:2: note: 'snprintf' output 21 bytes into a destination of size 20
    snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 04d348ae3f0a ("drm/i915/gvt: vGPU display virtualization")
Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1025903
Reported-by: Richard Biener <rguenther@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index e1abece853d4..a1880424ad32 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1069,7 +1069,7 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
 	char vmid_str[20];
 	char display_ready_str[20];
 
-	snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
+	snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready);
 	env[0] = display_ready_str;
 
 	snprintf(vmid_str, 20, "VMID=%d", vgpu->id);
-- 
cgit v1.2.3


From 4c4b22abb3d405c5c194b02255eecc1a9eff42cf Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 21 Feb 2017 09:39:00 +0800
Subject: drm/i915/gvt: add more registers to context save/restore list

the value of those registers should be applied to hardware on
context restoring

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/render.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 2b3a642284b6..73f052a4f424 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -53,6 +53,14 @@ static struct render_mmio gen8_render_mmio_list[] = {
 	{RCS, _MMIO(0x24d4), 0, false},
 	{RCS, _MMIO(0x24d8), 0, false},
 	{RCS, _MMIO(0x24dc), 0, false},
+	{RCS, _MMIO(0x24e0), 0, false},
+	{RCS, _MMIO(0x24e4), 0, false},
+	{RCS, _MMIO(0x24e8), 0, false},
+	{RCS, _MMIO(0x24ec), 0, false},
+	{RCS, _MMIO(0x24f0), 0, false},
+	{RCS, _MMIO(0x24f4), 0, false},
+	{RCS, _MMIO(0x24f8), 0, false},
+	{RCS, _MMIO(0x24fc), 0, false},
 	{RCS, _MMIO(0x7004), 0xffff, true},
 	{RCS, _MMIO(0x7008), 0xffff, true},
 	{RCS, _MMIO(0x7000), 0xffff, true},
@@ -76,6 +84,14 @@ static struct render_mmio gen9_render_mmio_list[] = {
 	{RCS, _MMIO(0x24d4), 0, false},
 	{RCS, _MMIO(0x24d8), 0, false},
 	{RCS, _MMIO(0x24dc), 0, false},
+	{RCS, _MMIO(0x24e0), 0, false},
+	{RCS, _MMIO(0x24e4), 0, false},
+	{RCS, _MMIO(0x24e8), 0, false},
+	{RCS, _MMIO(0x24ec), 0, false},
+	{RCS, _MMIO(0x24f0), 0, false},
+	{RCS, _MMIO(0x24f4), 0, false},
+	{RCS, _MMIO(0x24f8), 0, false},
+	{RCS, _MMIO(0x24fc), 0, false},
 	{RCS, _MMIO(0x7004), 0xffff, true},
 	{RCS, _MMIO(0x7008), 0xffff, true},
 	{RCS, _MMIO(0x7000), 0xffff, true},
-- 
cgit v1.2.3


From e6cedfea6b8dcb205f75ea632570f52d2ffd1251 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 21 Feb 2017 10:38:53 +0800
Subject: drm/i915/gvt: force-nopriv register handling

add a whitelist to check the content of force-nonpriv registers

v3:
per He Min's comment, modify in_whitelist()'s return type to bool, and use
negative value as the return value for failure for force_nonpriv_write().

v2:
1. split a big patch into two smaller ones per zhenyu's comment.
this patch is the mmio handling part for force-nopriv registers

2. per zhenyu's comment, combine all non-priv registers into a single
MMIO_DFH entry

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 74 +++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index a1880424ad32..af0c0d1ec9a8 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -398,6 +398,74 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	return 0;
 }
 
+/* ascendingly sorted */
+static i915_reg_t force_nonpriv_white_list[] = {
+	GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec)
+	GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248)
+	GEN8_CS_CHICKEN1,//_MMIO(0x2580)
+	_MMIO(0x2690),
+	_MMIO(0x2694),
+	_MMIO(0x2698),
+	_MMIO(0x4de0),
+	_MMIO(0x4de4),
+	_MMIO(0x4dfc),
+	GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010)
+	_MMIO(0x7014),
+	HDC_CHICKEN0,//_MMIO(0x7300)
+	GEN8_HDC_CHICKEN1,//_MMIO(0x7304)
+	_MMIO(0x7700),
+	_MMIO(0x7704),
+	_MMIO(0x7708),
+	_MMIO(0x770c),
+	_MMIO(0xb110),
+	GEN8_L3SQCREG4,//_MMIO(0xb118)
+	_MMIO(0xe100),
+	_MMIO(0xe18c),
+	_MMIO(0xe48c),
+	_MMIO(0xe5f4),
+};
+
+/* a simple bsearch */
+static inline bool in_whitelist(unsigned int reg)
+{
+	int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list);
+	i915_reg_t *array = force_nonpriv_white_list;
+
+	while (left < right) {
+		int mid = (left + right)/2;
+
+		if (reg > array[mid].reg)
+			left = mid + 1;
+		else if (reg < array[mid].reg)
+			right = mid;
+		else
+			return true;
+	}
+	return false;
+}
+
+static int force_nonpriv_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 reg_nonpriv = *(u32 *)p_data;
+	int ret = -EINVAL;
+
+	if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) {
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n",
+			vgpu->id, offset, bytes);
+		return ret;
+	}
+
+	if (in_whitelist(reg_nonpriv)) {
+		ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+			bytes);
+	} else {
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n",
+			vgpu->id, reg_nonpriv);
+	}
+	return ret;
+}
+
 static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
@@ -2420,10 +2488,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0xb10c, D_BDW);
 	MMIO_D(0xb110, D_BDW);
 
-	MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
+		NULL, force_nonpriv_write);
 
 	MMIO_D(0x22040, D_BDW_PLUS);
 	MMIO_D(0x44484, D_BDW_PLUS);
-- 
cgit v1.2.3


From 187447a106fc9caca45f10413845678d3666556c Mon Sep 17 00:00:00 2001
From: Pei Zhang <pei.zhang@intel.com>
Date: Tue, 21 Feb 2017 21:58:14 +0800
Subject: drm/i915/gvt: add cmd_access to GEN7_HALF_SLICE_CHICKEN1

Linux guest is using this MMIO in lri command. Add cmd_access flag
for this mmio in gvt to avoid error log.

v2: change the mmio address to its macro name

Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index af0c0d1ec9a8..bfe12ddb0210 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1607,7 +1607,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x243c, D_ALL);
 	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* display */
 	MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL);
-- 
cgit v1.2.3


From ec162aa84c9057e196f3a844c0a8d569f5095e6c Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 21 Feb 2017 14:00:37 +0800
Subject: drm/i915/gvt: set default value to 0 for unhandled mmio regs

for a handled mmio reg,  its default value is read from hardware, while
for an unhandled mmio regs,  its default value would be random if not
explicitly set to 0

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/firmware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 1cb29b2d7dc6..933a7c211a1c 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -80,7 +80,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	int ret;
 
 	size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1;
-	firmware = vmalloc(size);
+	firmware = vzalloc(size);
 	if (!firmware)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From bab059304314e127cf4a5330c6bd5a71fd27c022 Mon Sep 17 00:00:00 2001
From: "Zhao, Xinda" <xinda.zhao@intel.com>
Date: Tue, 21 Feb 2017 15:07:31 +0800
Subject: drm/i915/gvt: decrease priority of output msg for untracked mmio

When untracked mmio is visited, too many log info will be printed out,
it may confuse the user, but most of the time, it is not the urgent case,
so use gvt_dbg_mmio() instead.

Signed-off-by: Zhao, Xinda <xinda.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 99abb01fa9eb..60b698cb8365 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -298,7 +298,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 
 	mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4));
 	if (!mmio && !vgpu->mmio.disable_warn_untrack)
-		gvt_err("vgpu%d: write untracked MMIO %x len %d val %x\n",
+		gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n",
 				vgpu->id, offset, bytes, *(u32 *)p_data);
 
 	if (!intel_gvt_mmio_is_unalign(gvt, offset)) {
-- 
cgit v1.2.3


From da9cc8de22aa6bd6ed51c406432d599ab520a6e3 Mon Sep 17 00:00:00 2001
From: Ping Gao <ping.a.gao@intel.com>
Date: Tue, 21 Feb 2017 15:52:56 +0800
Subject: drm/i915/gvt: clear the vGPU reset logic

Releasing shadow PPGTT pages is not enough when vGPU reset, the
guest page table tracking data should has same life-cycle with
all the shadow PPGTT pages; Otherwise there is no chance to
re-shadow the PPGTT pages without free the guest page table
tracking data.

This patch clear the PPGTT reset logic and make the vGPU reset in
working order.

v2: refactor some logic to avoid code duplicated.
v3: remove useless macro and add comments from Christophe.
v4: keep reset logic in reset function.

Signed-off-by: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 28c92346db0e..b5c833287d39 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2015,6 +2015,22 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 	return create_scratch_page_tree(vgpu);
 }
 
+static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_mm *mm;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, list);
+		if (mm->type == type) {
+			vgpu->gvt->gtt.mm_free_page_table(mm);
+			list_del(&mm->list);
+			list_del(&mm->lru_list);
+			kfree(mm);
+		}
+	}
+}
+
 /**
  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
  * @vgpu: a vGPU
@@ -2027,19 +2043,11 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
  */
 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
 {
-	struct list_head *pos, *n;
-	struct intel_vgpu_mm *mm;
-
 	ppgtt_free_all_shadow_page(vgpu);
 	release_scratch_page_tree(vgpu);
 
-	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
-		mm = container_of(pos, struct intel_vgpu_mm, list);
-		vgpu->gvt->gtt.mm_free_page_table(mm);
-		list_del(&mm->list);
-		list_del(&mm->lru_list);
-		kfree(mm);
-	}
+	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
+	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT);
 }
 
 static void clean_spt_oos(struct intel_gvt *gvt)
@@ -2322,6 +2330,13 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr)
 	int i;
 
 	ppgtt_free_all_shadow_page(vgpu);
+
+	/* Shadow pages are only created when there is no page
+	 * table tracking data, so remove page tracking data after
+	 * removing the shadow pages.
+	 */
+	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
+
 	if (!dmlr)
 		return;
 
-- 
cgit v1.2.3


From d8a355be0b2b5613f7b34aee1394369d45d50586 Mon Sep 17 00:00:00 2001
From: Weinan Li <weinan.z.li@intel.com>
Date: Wed, 22 Feb 2017 11:03:24 +0800
Subject: drm/i915/gvt: refine pcode write emulation

In GVT-g we always emulate as pcode read/write success and ready for access
anytime, since we don't touch real physical registers here.

Add 'SKL_PCODE_CDCLK_CONTROL' write emulation, without it will cause
skl_set_cdclk fail in guest.

Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index bfe12ddb0210..f89b183488e9 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1315,6 +1315,9 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 		else
 			*data0 = 0x61514b3d;
 		break;
+	case SKL_PCODE_CDCLK_CONTROL:
+		*data0 = SKL_CDCLK_READY_FOR_CHANGE;
+		break;
 	case 0x5:
 		*data0 |= 0x1;
 		break;
@@ -1322,8 +1325,13 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 
 	gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n",
 		     vgpu->id, value, *data0);
-
-	value &= ~(1 << 31);
+	/**
+	 * PCODE_READY clear means ready for pcode read/write,
+	 * PCODE_ERROR_MASK clear means no error happened. In GVT-g we
+	 * always emulate as pcode read/write success and ready for access
+	 * anytime, since we don't touch real physical registers here.
+	 */
+	value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK);
 	return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes);
 }
 
-- 
cgit v1.2.3


From 7c28135c77414327523b89bfc3f13096e095f5ac Mon Sep 17 00:00:00 2001
From: "Zhao, Xinda" <xinda.zhao@intel.com>
Date: Tue, 21 Feb 2017 15:54:56 +0800
Subject: drm/i915/gvt: remove unnecessary error msg from gtt write

The guest VM may initialize the whole GTT table during boot up,
so the warning msg in emulate_gtt_mmio_write is not necessary, it is
the expected behavior and it may confuse the user if error msg is
printed out, so remove the msg from emulate_gtt_mmio_write(),

Signed-off-by: Zhao, Xinda <xinda.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index b5c833287d39..6a5ff23ded90 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1825,11 +1825,8 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	gma = g_gtt_index << GTT_PAGE_SHIFT;
 
 	/* the VM may configure the whole GM space when ballooning is used */
-	if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma),
-				"vgpu%d: found oob ggtt write, offset %x\n",
-				vgpu->id, off)) {
+	if (!vgpu_gmadr_is_valid(vgpu, gma))
 		return 0;
-	}
 
 	ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
 
-- 
cgit v1.2.3


From 191020b670f84f5e2edfaa906c3801df20485610 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 23 Feb 2017 14:46:23 +0800
Subject: drm/i915/gvt: adjust to fixed vGPU types

Previous vGPU type create tried to determine vGPU type name e.g _1, _2
based on the number of mdev devices can be created, but different type
might have very different resource size depending on physical device.
We need to split type name vs. actual mdev resource and create fixed
vGPU type with determined size for consistence.

With this we'd like to fix vGPU types for _1, _2, _4 and _8 now, each
type has fixed defined resource size. Available mdev instances that could
be created is determined by physical resource, and user should query
for that before creating.

Cc: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h  |  1 -
 drivers/gpu/drm/i915/gvt/vgpu.c | 58 ++++++++++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index a41b322d8957..faff044c6434 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -212,7 +212,6 @@ struct intel_gvt_opregion {
 #define NR_MAX_INTEL_VGPU_TYPES 20
 struct intel_vgpu_type {
 	char name[16];
-	unsigned int max_instance;
 	unsigned int avail_instance;
 	unsigned int low_gm_size;
 	unsigned int high_gm_size;
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index dcfcce1dc00e..c27c13c3cc48 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -64,6 +64,19 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 	WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 }
 
+static struct {
+	unsigned int low_mm;
+	unsigned int high_mm;
+	unsigned int fence;
+	char *name;
+} vgpu_types[] = {
+/* Fixed vGPU type table */
+	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" },
+};
+
 /**
  * intel_gvt_init_vgpu_types - initialize vGPU type list
  * @gvt : GVT device
@@ -78,9 +91,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 	unsigned int min_low;
 
 	/* vGPU type name is defined as GVTg_Vx_y which contains
-	 * physical GPU generation type and 'y' means maximum vGPU
-	 * instances user can create on one physical GPU for this
-	 * type.
+	 * physical GPU generation type (e.g V4 as BDW server, V5 as
+	 * SKL server).
 	 *
 	 * Depend on physical SKU resource, might see vGPU types like
 	 * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create
@@ -92,7 +104,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 	 */
 	low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
 	high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
-	num_types = 4;
+	num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]);
 
 	gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type),
 			     GFP_KERNEL);
@@ -101,25 +113,24 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 
 	min_low = MB_TO_BYTES(32);
 	for (i = 0; i < num_types; ++i) {
-		if (low_avail / min_low == 0)
+		if (low_avail / vgpu_types[i].low_mm == 0)
 			break;
-		gvt->types[i].low_gm_size = min_low;
-		gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U));
-		gvt->types[i].fence = 4;
-		gvt->types[i].max_instance = min(low_avail / min_low,
-						 high_avail / gvt->types[i].high_gm_size);
-		gvt->types[i].avail_instance = gvt->types[i].max_instance;
+
+		gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
+		gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
+		gvt->types[i].fence = vgpu_types[i].fence;
+		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
+						   high_avail / vgpu_types[i].high_mm);
 
 		if (IS_GEN8(gvt->dev_priv))
-			sprintf(gvt->types[i].name, "GVTg_V4_%u",
-						gvt->types[i].max_instance);
+			sprintf(gvt->types[i].name, "GVTg_V4_%s",
+						vgpu_types[i].name);
 		else if (IS_GEN9(gvt->dev_priv))
-			sprintf(gvt->types[i].name, "GVTg_V5_%u",
-						gvt->types[i].max_instance);
+			sprintf(gvt->types[i].name, "GVTg_V5_%s",
+						vgpu_types[i].name);
 
-		min_low <<= 1;
-		gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n",
-			     i, gvt->types[i].name, gvt->types[i].max_instance,
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n",
+			     i, gvt->types[i].name,
 			     gvt->types[i].avail_instance,
 			     gvt->types[i].low_gm_size,
 			     gvt->types[i].high_gm_size, gvt->types[i].fence);
@@ -138,7 +149,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 {
 	int i;
 	unsigned int low_gm_avail, high_gm_avail, fence_avail;
-	unsigned int low_gm_min, high_gm_min, fence_min, total_min;
+	unsigned int low_gm_min, high_gm_min, fence_min;
 
 	/* Need to depend on maxium hw resource size but keep on
 	 * static config for now.
@@ -154,12 +165,11 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 		low_gm_min = low_gm_avail / gvt->types[i].low_gm_size;
 		high_gm_min = high_gm_avail / gvt->types[i].high_gm_size;
 		fence_min = fence_avail / gvt->types[i].fence;
-		total_min = min(min(low_gm_min, high_gm_min), fence_min);
-		gvt->types[i].avail_instance = min(gvt->types[i].max_instance,
-						   total_min);
+		gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min),
+						   fence_min);
 
-		gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n",
-		       i, gvt->types[i].name, gvt->types[i].max_instance,
+		gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n",
+		       i, gvt->types[i].name,
 		       gvt->types[i].avail_instance, gvt->types[i].low_gm_size,
 		       gvt->types[i].high_gm_size, gvt->types[i].fence);
 	}
-- 
cgit v1.2.3


From bca5609fcc401b0055a3c01ecc33c5f6fb67af7b Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 24 Feb 2017 10:58:20 +0800
Subject: drm/i915/gvt: Add more edid definition support

We'll need to apply different resolution for vgpu types, so this
adds more EDID types definition.

v2: fix typo for actual 1920x1200 resolution

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 112 ++++++++++++++++++++++++-------------
 drivers/gpu/drm/i915/gvt/display.h |   6 ++
 2 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 6d8fde880c39..d346e43656f7 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -83,44 +83,80 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
 	return 0;
 }
 
+static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
+	{
+/* EDID with 1024x768 as its resolution */
+		/*Header*/
+		0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+		/* Vendor & Product Identification */
+		0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+		/* Version & Revision */
+		0x01, 0x04,
+		/* Basic Display Parameters & Features */
+		0xa5, 0x34, 0x20, 0x78, 0x23,
+		/* Color Characteristics */
+		0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+		/* Established Timings: maximum resolution is 1024x768 */
+		0x21, 0x08, 0x00,
+		/* Standard Timings. All invalid */
+		0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00,
+		0x00, 0x40, 0x00, 0x00, 0x00, 0x01,
+		/* 18 Byte Data Blocks 1: invalid */
+		0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0,
+		0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+		/* 18 Byte Data Blocks 2: invalid */
+		0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		/* 18 Byte Data Blocks 3: invalid */
+		0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+		0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+		/* 18 Byte Data Blocks 4: invalid */
+		0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+		0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+		/* Extension Block Count */
+		0x00,
+		/* Checksum */
+		0xef,
+	},
+	{
 /* EDID with 1920x1200 as its resolution */
-static unsigned char virtual_dp_monitor_edid[] = {
-	/*Header*/
-	0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-	/* Vendor & Product Identification */
-	0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
-	/* Version & Revision */
-	0x01, 0x04,
-	/* Basic Display Parameters & Features */
-	0xa5, 0x34, 0x20, 0x78, 0x23,
-	/* Color Characteristics */
-	0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
-	/* Established Timings: maximum resolution is 1024x768 */
-	0x21, 0x08, 0x00,
-	/*
-	 * Standard Timings.
-	 * below new resolutions can be supported:
-	 * 1920x1080, 1280x720, 1280x960, 1280x1024,
-	 * 1440x900, 1600x1200, 1680x1050
-	 */
-	0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
-	0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
-	/* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
-	0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
-	0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
-	/* 18 Byte Data Blocks 2: invalid */
-	0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
-	0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-	/* 18 Byte Data Blocks 3: invalid */
-	0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
-	0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
-	/* 18 Byte Data Blocks 4: invalid */
-	0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
-	0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
-	/* Extension Block Count */
-	0x00,
-	/* Checksum */
-	0x45,
+		/*Header*/
+		0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+		/* Vendor & Product Identification */
+		0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+		/* Version & Revision */
+		0x01, 0x04,
+		/* Basic Display Parameters & Features */
+		0xa5, 0x34, 0x20, 0x78, 0x23,
+		/* Color Characteristics */
+		0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+		/* Established Timings: maximum resolution is 1024x768 */
+		0x21, 0x08, 0x00,
+		/*
+		 * Standard Timings.
+		 * below new resolutions can be supported:
+		 * 1920x1080, 1280x720, 1280x960, 1280x1024,
+		 * 1440x900, 1600x1200, 1680x1050
+		 */
+		0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
+		0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
+		/* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
+		0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
+		0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+		/* 18 Byte Data Blocks 2: invalid */
+		0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		/* 18 Byte Data Blocks 3: invalid */
+		0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+		0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+		/* 18 Byte Data Blocks 4: invalid */
+		0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+		0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+		/* Extension Block Count */
+		0x00,
+		/* Checksum */
+		0x45,
+	},
 };
 
 #define DPCD_HEADER_SIZE        0xb
@@ -189,7 +225,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 		return -ENOMEM;
 	}
 
-	memcpy(port->edid->edid_block, virtual_dp_monitor_edid,
+	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200],
 			EDID_SIZE);
 	port->edid->data_valid = true;
 
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index 8b234ea961f6..84f160bb7bc2 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -154,6 +154,12 @@ struct intel_vgpu_port {
 	int type;
 };
 
+enum intel_vgpu_edid {
+	GVT_EDID_1024_768,
+	GVT_EDID_1920_1200,
+	GVT_EDID_NUM,
+};
+
 void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 
-- 
cgit v1.2.3


From d1a513be1f0a25f094e1577d059b9aebaa279bb2 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 24 Feb 2017 10:58:21 +0800
Subject: drm/i915/gvt: add resolution definition for vGPU type

This assigns resolution definition for each vGPU type. For smaller
resource type we should limit max resolution, so e.g limit to 1024x768
for 64M type, others are still default to 1920x1200.

v2: Fix for actual 1920x1200 resolution

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 15 ++++++++++-----
 drivers/gpu/drm/i915/gvt/display.h | 14 +++++++++++++-
 drivers/gpu/drm/i915/gvt/gvt.h     |  2 ++
 drivers/gpu/drm/i915/gvt/kvmgt.c   |  8 ++++----
 drivers/gpu/drm/i915/gvt/vgpu.c    | 18 +++++++++++-------
 5 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index d346e43656f7..43e02e038375 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -211,10 +211,13 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
 }
 
 static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
-		int type)
+				    int type, unsigned int resolution)
 {
 	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 
+	if (WARN_ON(resolution >= GVT_EDID_NUM))
+		return -EINVAL;
+
 	port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL);
 	if (!port->edid)
 		return -ENOMEM;
@@ -225,7 +228,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 		return -ENOMEM;
 	}
 
-	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200],
+	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution],
 			EDID_SIZE);
 	port->edid->data_valid = true;
 
@@ -358,16 +361,18 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
  * Zero on success, negative error code if failed.
  *
  */
-int intel_vgpu_init_display(struct intel_vgpu *vgpu)
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
 	intel_vgpu_init_i2c_edid(vgpu);
 
 	if (IS_SKYLAKE(dev_priv))
-		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D);
+		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
+						resolution);
 	else
-		return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B);
+		return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B,
+						resolution);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index 84f160bb7bc2..d73de22102e2 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -160,10 +160,22 @@ enum intel_vgpu_edid {
 	GVT_EDID_NUM,
 };
 
+static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return "1024x768";
+	case GVT_EDID_1920_1200:
+		return "1920x1200";
+	default:
+		return "";
+	}
+}
+
 void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 
-int intel_vgpu_init_display(struct intel_vgpu *vgpu);
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution);
 void intel_vgpu_reset_display(struct intel_vgpu *vgpu);
 void intel_vgpu_clean_display(struct intel_vgpu *vgpu);
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index faff044c6434..23791920ced1 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -216,6 +216,7 @@ struct intel_vgpu_type {
 	unsigned int low_gm_size;
 	unsigned int high_gm_size;
 	unsigned int fence;
+	enum intel_vgpu_edid resolution;
 };
 
 struct intel_gvt {
@@ -318,6 +319,7 @@ struct intel_vgpu_creation_params {
 	__u64 low_gm_sz;  /* in MB */
 	__u64 high_gm_sz; /* in MB */
 	__u64 fence_sz;
+	__u64 resolution;
 	__s32 primary;
 	__u64 vgpu_id;
 };
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 10c3a4b95a92..182914c22ac5 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -295,10 +295,10 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
 		return 0;
 
 	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-				"fence: %d\n",
-				BYTES_TO_MB(type->low_gm_size),
-				BYTES_TO_MB(type->high_gm_size),
-				type->fence);
+		       "fence: %d\nresolution: %s\n",
+		       BYTES_TO_MB(type->low_gm_size),
+		       BYTES_TO_MB(type->high_gm_size),
+		       type->fence, vgpu_edid_str(type->resolution));
 }
 
 static MDEV_TYPE_ATTR_RO(available_instance);
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index c27c13c3cc48..41cfa5ccae84 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -68,13 +68,14 @@ static struct {
 	unsigned int low_mm;
 	unsigned int high_mm;
 	unsigned int fence;
+	enum intel_vgpu_edid edid;
 	char *name;
 } vgpu_types[] = {
 /* Fixed vGPU type table */
-	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" },
-	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" },
-	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" },
-	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" },
+	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },
 };
 
 /**
@@ -119,6 +120,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 		gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
 		gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
 		gvt->types[i].fence = vgpu_types[i].fence;
+		gvt->types[i].resolution = vgpu_types[i].edid;
 		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
 						   high_avail / vgpu_types[i].high_mm);
 
@@ -129,11 +131,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 			sprintf(gvt->types[i].name, "GVTg_V5_%s",
 						vgpu_types[i].name);
 
-		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n",
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n",
 			     i, gvt->types[i].name,
 			     gvt->types[i].avail_instance,
 			     gvt->types[i].low_gm_size,
-			     gvt->types[i].high_gm_size, gvt->types[i].fence);
+			     gvt->types[i].high_gm_size, gvt->types[i].fence,
+			     vgpu_edid_str(gvt->types[i].resolution));
 	}
 
 	gvt->num_types = i;
@@ -258,7 +261,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_detach_hypervisor_vgpu;
 
-	ret = intel_vgpu_init_display(vgpu);
+	ret = intel_vgpu_init_display(vgpu, param->resolution);
 	if (ret)
 		goto out_clean_gtt;
 
@@ -322,6 +325,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	param.low_gm_sz = type->low_gm_size;
 	param.high_gm_sz = type->high_gm_size;
 	param.fence_sz = type->fence;
+	param.resolution = type->resolution;
 
 	/* XXX current param based on MB */
 	param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz);
-- 
cgit v1.2.3


From 8bcd7c188bc479bea866d286a7dc0af9734c3c64 Mon Sep 17 00:00:00 2001
From: Weinan Li <weinan.z.li@intel.com>
Date: Fri, 24 Feb 2017 17:07:38 +0800
Subject: drm/i915/gvt: fix pcode mailbox write emulation of BDW

Add pcode mailbox write emulation in gvt for BDW, reuse emulation code of
Skylake.

V2: refine comments, remove duplication defination of 0x138124, add
IS_SKYLAKE() check for Skylake only pcode commands.

Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index f89b183488e9..ebbe74072dc3 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1304,21 +1304,24 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 	u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA);
 
 	switch (cmd) {
-	case 0x6:
-		/**
-		 * "Read memory latency" command on gen9.
-		 * Below memory latency values are read
-		 * from skylake platform.
-		 */
-		if (!*data0)
-			*data0 = 0x1e1a1100;
-		else
-			*data0 = 0x61514b3d;
+	case GEN9_PCODE_READ_MEM_LATENCY:
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+			/**
+			 * "Read memory latency" command on gen9.
+			 * Below memory latency values are read
+			 * from skylake platform.
+			 */
+			if (!*data0)
+				*data0 = 0x1e1a1100;
+			else
+				*data0 = 0x61514b3d;
+		}
 		break;
 	case SKL_PCODE_CDCLK_CONTROL:
-		*data0 = SKL_CDCLK_READY_FOR_CHANGE;
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv))
+			*data0 = SKL_CDCLK_READY_FOR_CHANGE;
 		break;
-	case 0x5:
+	case GEN6_PCODE_READ_RC6VIDS:
 		*data0 |= 0x1;
 		break;
 	}
@@ -2202,7 +2205,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL);
 
-	MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL);
+	MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW);
 	MMIO_D(GEN6_PCODE_DATA, D_ALL);
 	MMIO_D(0x13812c, D_ALL);
 	MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL);
@@ -2281,7 +2284,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x1a054, D_ALL);
 
 	MMIO_D(0x44070, D_ALL);
-
 	MMIO_D(0x215c, D_HSW_PLUS);
 	MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -2453,6 +2455,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS);
 	MMIO_D(0x1c054, D_BDW_PLUS);
 
+	MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
+
 	MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS);
 	MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
 
@@ -2544,7 +2548,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(HSW_PWR_WELL_BIOS, D_SKL);
 	MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write);
 
-	MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write);
 	MMIO_D(0xa210, D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
-- 
cgit v1.2.3


From 41bfab35b3d0e0a85ed0c12d7bafd7484e96ca78 Mon Sep 17 00:00:00 2001
From: Pei Zhang <pei.zhang@intel.com>
Date: Fri, 24 Feb 2017 16:03:28 +0800
Subject: drm/i915/gvt: add some new MMIOs to cmd_access white list

Guest is now acces some MMIOs (0x215c, RING_INSTPM) through command which
is not originally in gvt's white list. This cause huge error log printed in
gvt.
This patch addes these MMIOs to the white list.

V2. change the commit message content.
V3. remove duplicate defination of 0x20c0.
V4. refine code style.

Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index ebbe74072dc3..57b4d538f370 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1589,7 +1589,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 #undef RING_REG
 
 	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+			NULL, NULL);
 	MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS,
 			ring_timestamp_mmio_read, NULL);
 	MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS,
@@ -2284,7 +2285,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x1a054, D_ALL);
 
 	MMIO_D(0x44070, D_ALL);
-	MMIO_D(0x215c, D_HSW_PLUS);
+	MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);
-- 
cgit v1.2.3


From 9fc5cf6e14fc1db40ea8a1c10061bb9586e78585 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:24 +0100
Subject: platform/x86: fujitsu-laptop: clearly denote backlight-related
 symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unify naming for all backlight-related functions, structures, variables
and constants by using a consistent "_bl"/"_BL" suffix/infix.  Adjust
indentation to make checkpatch happy.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 214 +++++++++++++++++-----------------
 1 file changed, 107 insertions(+), 107 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 2b218b1d13e5..e1737b9d9d95 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -78,10 +78,10 @@
 
 #define FUJITSU_LCD_N_LEVELS 8
 
-#define ACPI_FUJITSU_CLASS              "fujitsu"
-#define ACPI_FUJITSU_HID                "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME	"Fujitsu laptop FUJ02B1 ACPI brightness driver"
-#define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
+#define ACPI_FUJITSU_CLASS		"fujitsu"
+#define ACPI_FUJITSU_BL_HID		"FUJ02B1"
+#define ACPI_FUJITSU_BL_DRIVER_NAME	"Fujitsu laptop FUJ02B1 ACPI brightness driver"
+#define ACPI_FUJITSU_BL_DEVICE_NAME	"Fujitsu FUJ02B1"
 #define ACPI_FUJITSU_HOTKEY_HID 	"FUJ02E3"
 #define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
 #define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
@@ -136,7 +136,7 @@
 #endif
 
 /* Device controlling the backlight and associated keys */
-struct fujitsu_t {
+struct fujitsu_bl {
 	acpi_handle acpi_handle;
 	struct acpi_device *dev;
 	struct input_dev *input;
@@ -150,7 +150,7 @@ struct fujitsu_t {
 	unsigned int brightness_level;
 };
 
-static struct fujitsu_t *fujitsu;
+static struct fujitsu_bl *fujitsu_bl;
 static int use_alt_lcd_levels = -1;
 static int disable_brightness_adjust = -1;
 
@@ -222,7 +222,7 @@ static struct led_classdev eco_led = {
 static u32 dbg_level = 0x03;
 #endif
 
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event);
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event);
 
 /* Fujitsu ACPI interface function */
 
@@ -373,10 +373,10 @@ static int set_lcd_level(int level)
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
 		    level);
 
-	if (level < 0 || level >= fujitsu->max_brightness)
+	if (level < 0 || level >= fujitsu_bl->max_brightness)
 		return -EINVAL;
 
-	status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+	status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
 		return -ENODEV;
@@ -398,10 +398,10 @@ static int set_lcd_level_alt(int level)
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
 		    level);
 
-	if (level < 0 || level >= fujitsu->max_brightness)
+	if (level < 0 || level >= fujitsu_bl->max_brightness)
 		return -EINVAL;
 
-	status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+	status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
 		return -ENODEV;
@@ -421,19 +421,19 @@ static int get_lcd_level(void)
 
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
 
-	status =
-	    acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+	status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL,
+				       &state);
 	if (ACPI_FAILURE(status))
 		return 0;
 
-	fujitsu->brightness_level = state & 0x0fffffff;
+	fujitsu_bl->brightness_level = state & 0x0fffffff;
 
 	if (state & 0x80000000)
-		fujitsu->brightness_changed = 1;
+		fujitsu_bl->brightness_changed = 1;
 	else
-		fujitsu->brightness_changed = 0;
+		fujitsu_bl->brightness_changed = 0;
 
-	return fujitsu->brightness_level;
+	return fujitsu_bl->brightness_level;
 }
 
 static int get_max_brightness(void)
@@ -443,14 +443,14 @@ static int get_max_brightness(void)
 
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
 
-	status =
-	    acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+	status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL,
+				       &state);
 	if (ACPI_FAILURE(status))
 		return -1;
 
-	fujitsu->max_brightness = state;
+	fujitsu_bl->max_brightness = state;
 
-	return fujitsu->max_brightness;
+	return fujitsu_bl->max_brightness;
 }
 
 /* Backlight device stuff */
@@ -483,7 +483,7 @@ static int bl_update_status(struct backlight_device *b)
 	return ret;
 }
 
-static const struct backlight_ops fujitsubl_ops = {
+static const struct backlight_ops fujitsu_bl_ops = {
 	.get_brightness = bl_get_brightness,
 	.update_status = bl_update_status,
 };
@@ -511,7 +511,7 @@ show_brightness_changed(struct device *dev,
 
 	int ret;
 
-	ret = fujitsu->brightness_changed;
+	ret = fujitsu_bl->brightness_changed;
 	if (ret < 0)
 		return ret;
 
@@ -539,7 +539,7 @@ static ssize_t store_lcd_level(struct device *dev,
 	int level, ret;
 
 	if (sscanf(buf, "%i", &level) != 1
-	    || (level < 0 || level >= fujitsu->max_brightness))
+	    || (level < 0 || level >= fujitsu_bl->max_brightness))
 		return -EINVAL;
 
 	if (use_alt_lcd_levels)
@@ -644,25 +644,25 @@ static void __init dmi_check_cb_common(const struct dmi_system_id *id)
 static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
 {
 	dmi_check_cb_common(id);
-	fujitsu->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
-	fujitsu->keycode2 = KEY_HELP;	/* "Mobility Center" */
+	fujitsu_bl->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
+	fujitsu_bl->keycode2 = KEY_HELP;	/* "Mobility Center" */
 	return 1;
 }
 
 static int __init dmi_check_cb_s6420(const struct dmi_system_id *id)
 {
 	dmi_check_cb_common(id);
-	fujitsu->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
-	fujitsu->keycode2 = KEY_HELP;	/* "Mobility Center" */
+	fujitsu_bl->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
+	fujitsu_bl->keycode2 = KEY_HELP;	/* "Mobility Center" */
 	return 1;
 }
 
 static int __init dmi_check_cb_p8010(const struct dmi_system_id *id)
 {
 	dmi_check_cb_common(id);
-	fujitsu->keycode1 = KEY_HELP;	/* "Support" */
-	fujitsu->keycode3 = KEY_SWITCHVIDEOMODE;	/* "Presentation" */
-	fujitsu->keycode4 = KEY_WWW;	/* "Internet" */
+	fujitsu_bl->keycode1 = KEY_HELP;		/* "Support" */
+	fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE;	/* "Presentation" */
+	fujitsu_bl->keycode4 = KEY_WWW;			/* "Internet" */
 	return 1;
 }
 
@@ -693,7 +693,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = {
 
 /* ACPI device for LCD brightness control */
 
-static int acpi_fujitsu_add(struct acpi_device *device)
+static int acpi_fujitsu_bl_add(struct acpi_device *device)
 {
 	int state = 0;
 	struct input_dev *input;
@@ -702,22 +702,22 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	if (!device)
 		return -EINVAL;
 
-	fujitsu->acpi_handle = device->handle;
-	sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+	fujitsu_bl->acpi_handle = device->handle;
+	sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-	device->driver_data = fujitsu;
+	device->driver_data = fujitsu_bl;
 
-	fujitsu->input = input = input_allocate_device();
+	fujitsu_bl->input = input = input_allocate_device();
 	if (!input) {
 		error = -ENOMEM;
 		goto err_stop;
 	}
 
-	snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+	snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys),
 		 "%s/video/input0", acpi_device_hid(device));
 
 	input->name = acpi_device_name(device);
-	input->phys = fujitsu->phys;
+	input->phys = fujitsu_bl->phys;
 	input->id.bustype = BUS_HOST;
 	input->id.product = 0x06;
 	input->dev.parent = &device->dev;
@@ -730,7 +730,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	if (error)
 		goto err_free_input_dev;
 
-	error = acpi_bus_update_power(fujitsu->acpi_handle, &state);
+	error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state);
 	if (error) {
 		pr_err("Error reading power state\n");
 		goto err_unregister_input_dev;
@@ -740,7 +740,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
-	fujitsu->dev = device;
+	fujitsu_bl->dev = device;
 
 	if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
 		vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -758,7 +758,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 		    use_alt_lcd_levels, disable_brightness_adjust);
 
 	if (get_max_brightness() <= 0)
-		fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+		fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS;
 	get_lcd_level();
 
 	return 0;
@@ -772,38 +772,38 @@ err_stop:
 	return error;
 }
 
-static int acpi_fujitsu_remove(struct acpi_device *device)
+static int acpi_fujitsu_bl_remove(struct acpi_device *device)
 {
-	struct fujitsu_t *fujitsu = acpi_driver_data(device);
-	struct input_dev *input = fujitsu->input;
+	struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device);
+	struct input_dev *input = fujitsu_bl->input;
 
 	input_unregister_device(input);
 
-	fujitsu->acpi_handle = NULL;
+	fujitsu_bl->acpi_handle = NULL;
 
 	return 0;
 }
 
 /* Brightness notify */
 
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 {
 	struct input_dev *input;
 	int keycode;
 	int oldb, newb;
 
-	input = fujitsu->input;
+	input = fujitsu_bl->input;
 
 	switch (event) {
 	case ACPI_FUJITSU_NOTIFY_CODE1:
 		keycode = 0;
-		oldb = fujitsu->brightness_level;
+		oldb = fujitsu_bl->brightness_level;
 		get_lcd_level();
-		newb = fujitsu->brightness_level;
+		newb = fujitsu_bl->brightness_level;
 
 		vdbg_printk(FUJLAPTOP_DBG_TRACE,
 			    "brightness button event [%i -> %i (%i)]\n",
-			    oldb, newb, fujitsu->brightness_changed);
+			    oldb, newb, fujitsu_bl->brightness_changed);
 
 		if (oldb < newb) {
 			if (disable_brightness_adjust != 1) {
@@ -882,11 +882,11 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	input->dev.parent = &device->dev;
 
 	set_bit(EV_KEY, input->evbit);
-	set_bit(fujitsu->keycode1, input->keybit);
-	set_bit(fujitsu->keycode2, input->keybit);
-	set_bit(fujitsu->keycode3, input->keybit);
-	set_bit(fujitsu->keycode4, input->keybit);
-	set_bit(fujitsu->keycode5, input->keybit);
+	set_bit(fujitsu_bl->keycode1, input->keybit);
+	set_bit(fujitsu_bl->keycode2, input->keybit);
+	set_bit(fujitsu_bl->keycode3, input->keybit);
+	set_bit(fujitsu_bl->keycode4, input->keybit);
+	set_bit(fujitsu_bl->keycode5, input->keybit);
 	set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
 	set_bit(KEY_UNKNOWN, input->keybit);
 
@@ -937,7 +937,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 	if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&logolamp_led);
 		if (result == 0) {
 			fujitsu_hotkey->logolamp_registered = 1;
@@ -949,7 +949,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
 	   (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&kblamps_led);
 		if (result == 0) {
 			fujitsu_hotkey->kblamps_registered = 1;
@@ -966,7 +966,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	 * that an RF LED is present.
 	 */
 	if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&radio_led);
 		if (result == 0) {
 			fujitsu_hotkey->radio_led_registered = 1;
@@ -983,7 +983,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	*/
 	if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
 	   (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&eco_led);
 		if (result == 0) {
 			fujitsu_hotkey->eco_led_registered = 1;
@@ -1103,19 +1103,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			&& (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
 		switch (irb & 0x4ff) {
 		case KEY1_CODE:
-			keycode = fujitsu->keycode1;
+			keycode = fujitsu_bl->keycode1;
 			break;
 		case KEY2_CODE:
-			keycode = fujitsu->keycode2;
+			keycode = fujitsu_bl->keycode2;
 			break;
 		case KEY3_CODE:
-			keycode = fujitsu->keycode3;
+			keycode = fujitsu_bl->keycode3;
 			break;
 		case KEY4_CODE:
-			keycode = fujitsu->keycode4;
+			keycode = fujitsu_bl->keycode4;
 			break;
 		case KEY5_CODE:
-			keycode = fujitsu->keycode5;
+			keycode = fujitsu_bl->keycode5;
 			break;
 		case 0:
 			keycode = 0;
@@ -1150,19 +1150,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 
 /* Initialization */
 
-static const struct acpi_device_id fujitsu_device_ids[] = {
-	{ACPI_FUJITSU_HID, 0},
+static const struct acpi_device_id fujitsu_bl_device_ids[] = {
+	{ACPI_FUJITSU_BL_HID, 0},
 	{"", 0},
 };
 
-static struct acpi_driver acpi_fujitsu_driver = {
-	.name = ACPI_FUJITSU_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_bl_driver = {
+	.name = ACPI_FUJITSU_BL_DRIVER_NAME,
 	.class = ACPI_FUJITSU_CLASS,
-	.ids = fujitsu_device_ids,
+	.ids = fujitsu_bl_device_ids,
 	.ops = {
-		.add = acpi_fujitsu_add,
-		.remove = acpi_fujitsu_remove,
-		.notify = acpi_fujitsu_notify,
+		.add = acpi_fujitsu_bl_add,
+		.remove = acpi_fujitsu_bl_remove,
+		.notify = acpi_fujitsu_bl_notify,
 		},
 };
 
@@ -1183,7 +1183,7 @@ static struct acpi_driver acpi_fujitsu_hotkey_driver = {
 };
 
 static const struct acpi_device_id fujitsu_ids[] __used = {
-	{ACPI_FUJITSU_HID, 0},
+	{ACPI_FUJITSU_BL_HID, 0},
 	{ACPI_FUJITSU_HOTKEY_HID, 0},
 	{"", 0}
 };
@@ -1196,17 +1196,17 @@ static int __init fujitsu_init(void)
 	if (acpi_disabled)
 		return -ENODEV;
 
-	fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
-	if (!fujitsu)
+	fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL);
+	if (!fujitsu_bl)
 		return -ENOMEM;
-	fujitsu->keycode1 = KEY_PROG1;
-	fujitsu->keycode2 = KEY_PROG2;
-	fujitsu->keycode3 = KEY_PROG3;
-	fujitsu->keycode4 = KEY_PROG4;
-	fujitsu->keycode5 = KEY_RFKILL;
+	fujitsu_bl->keycode1 = KEY_PROG1;
+	fujitsu_bl->keycode2 = KEY_PROG2;
+	fujitsu_bl->keycode3 = KEY_PROG3;
+	fujitsu_bl->keycode4 = KEY_PROG4;
+	fujitsu_bl->keycode5 = KEY_RFKILL;
 	dmi_check_system(fujitsu_dmi_table);
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_driver);
+	result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
 	if (result < 0) {
 		ret = -ENODEV;
 		goto fail_acpi;
@@ -1214,18 +1214,18 @@ static int __init fujitsu_init(void)
 
 	/* Register platform stuff */
 
-	fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
-	if (!fujitsu->pf_device) {
+	fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+	if (!fujitsu_bl->pf_device) {
 		ret = -ENOMEM;
 		goto fail_platform_driver;
 	}
 
-	ret = platform_device_add(fujitsu->pf_device);
+	ret = platform_device_add(fujitsu_bl->pf_device);
 	if (ret)
 		goto fail_platform_device1;
 
 	ret =
-	    sysfs_create_group(&fujitsu->pf_device->dev.kobj,
+	    sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
 			       &fujitsupf_attribute_group);
 	if (ret)
 		goto fail_platform_device2;
@@ -1236,19 +1236,19 @@ static int __init fujitsu_init(void)
 		struct backlight_properties props;
 
 		memset(&props, 0, sizeof(struct backlight_properties));
-		max_brightness = fujitsu->max_brightness;
+		max_brightness = fujitsu_bl->max_brightness;
 		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = max_brightness - 1;
-		fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
-							       NULL, NULL,
-							       &fujitsubl_ops,
-							       &props);
-		if (IS_ERR(fujitsu->bl_device)) {
-			ret = PTR_ERR(fujitsu->bl_device);
-			fujitsu->bl_device = NULL;
+		fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop",
+								  NULL, NULL,
+								  &fujitsu_bl_ops,
+								  &props);
+		if (IS_ERR(fujitsu_bl->bl_device)) {
+			ret = PTR_ERR(fujitsu_bl->bl_device);
+			fujitsu_bl->bl_device = NULL;
 			goto fail_sysfs_group;
 		}
-		fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+		fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
 	}
 
 	ret = platform_driver_register(&fujitsupf_driver);
@@ -1272,9 +1272,9 @@ static int __init fujitsu_init(void)
 	/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
 	if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
 		if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
-			fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN;
+			fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
 		else
-			fujitsu->bl_device->props.power = FB_BLANK_UNBLANK;
+			fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
 	}
 
 	pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
@@ -1286,18 +1286,18 @@ fail_hotkey1:
 fail_hotkey:
 	platform_driver_unregister(&fujitsupf_driver);
 fail_backlight:
-	backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu_bl->bl_device);
 fail_sysfs_group:
-	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
 fail_platform_device2:
-	platform_device_del(fujitsu->pf_device);
+	platform_device_del(fujitsu_bl->pf_device);
 fail_platform_device1:
-	platform_device_put(fujitsu->pf_device);
+	platform_device_put(fujitsu_bl->pf_device);
 fail_platform_driver:
-	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
 fail_acpi:
-	kfree(fujitsu);
+	kfree(fujitsu_bl);
 
 	return ret;
 }
@@ -1310,16 +1310,16 @@ static void __exit fujitsu_cleanup(void)
 
 	platform_driver_unregister(&fujitsupf_driver);
 
-	backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu_bl->bl_device);
 
-	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
 
-	platform_device_unregister(fujitsu->pf_device);
+	platform_device_unregister(fujitsu_bl->pf_device);
 
-	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
 
-	kfree(fujitsu);
+	kfree(fujitsu_bl);
 
 	pr_info("driver unloaded\n");
 }
-- 
cgit v1.2.3


From 6942eabc140eac54d5c0db2695eed63768209c34 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:25 +0100
Subject: platform/x86: fujitsu-laptop: replace "hotkey" with "laptop" in
 symbol names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Functions, structures, variables and constants whose names currently
contain the "hotkey" keyword are not only responsible for handling
hotkeys, but also other laptop-related features (rfkill, lid, dock,
LEDs).  Fix their naming by using a consistent "_laptop"/"_LAPTOP"
suffix/infix.  Update comments so that they reflect this change.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 158 +++++++++++++++++-----------------
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index e1737b9d9d95..70124004b346 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -82,9 +82,9 @@
 #define ACPI_FUJITSU_BL_HID		"FUJ02B1"
 #define ACPI_FUJITSU_BL_DRIVER_NAME	"Fujitsu laptop FUJ02B1 ACPI brightness driver"
 #define ACPI_FUJITSU_BL_DEVICE_NAME	"Fujitsu FUJ02B1"
-#define ACPI_FUJITSU_HOTKEY_HID 	"FUJ02E3"
-#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
-#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_HID		"FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME	"Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME	"Fujitsu FUJ02E3"
 
 #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
 
@@ -154,8 +154,8 @@ static struct fujitsu_bl *fujitsu_bl;
 static int use_alt_lcd_levels = -1;
 static int disable_brightness_adjust = -1;
 
-/* Device used to access other hotkeys on the laptop */
-struct fujitsu_hotkey_t {
+/* Device used to access hotkeys and other features on the laptop */
+struct fujitsu_laptop {
 	acpi_handle acpi_handle;
 	struct acpi_device *dev;
 	struct input_dev *input;
@@ -171,9 +171,9 @@ struct fujitsu_hotkey_t {
 	int eco_led_registered;
 };
 
-static struct fujitsu_hotkey_t *fujitsu_hotkey;
+static struct fujitsu_laptop *fujitsu_laptop;
 
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event);
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 static enum led_brightness logolamp_get(struct led_classdev *cdev);
@@ -239,7 +239,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
 	unsigned long long value;
 	acpi_handle handle = NULL;
 
-	status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
+	status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR,
 				"FUNC interface is not present\n");
@@ -567,9 +567,9 @@ static ssize_t
 show_lid_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_hotkey->rfkill_supported & 0x100))
+	if (!(fujitsu_laptop->rfkill_supported & 0x100))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_hotkey->rfkill_state & 0x100)
+	if (fujitsu_laptop->rfkill_state & 0x100)
 		return sprintf(buf, "open\n");
 	else
 		return sprintf(buf, "closed\n");
@@ -579,9 +579,9 @@ static ssize_t
 show_dock_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_hotkey->rfkill_supported & 0x200))
+	if (!(fujitsu_laptop->rfkill_supported & 0x200))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_hotkey->rfkill_state & 0x200)
+	if (fujitsu_laptop->rfkill_state & 0x200)
 		return sprintf(buf, "docked\n");
 	else
 		return sprintf(buf, "undocked\n");
@@ -591,9 +591,9 @@ static ssize_t
 show_radios_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_hotkey->rfkill_supported & 0x20))
+	if (!(fujitsu_laptop->rfkill_supported & 0x20))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_hotkey->rfkill_state & 0x20)
+	if (fujitsu_laptop->rfkill_state & 0x20)
 		return sprintf(buf, "on\n");
 	else
 		return sprintf(buf, "killed\n");
@@ -840,7 +840,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 
 /* ACPI device for hotkey handling */
 
-static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 {
 	int result = 0;
 	int state = 0;
@@ -851,32 +851,32 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	if (!device)
 		return -EINVAL;
 
-	fujitsu_hotkey->acpi_handle = device->handle;
+	fujitsu_laptop->acpi_handle = device->handle;
 	sprintf(acpi_device_name(device), "%s",
-		ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+		ACPI_FUJITSU_LAPTOP_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-	device->driver_data = fujitsu_hotkey;
+	device->driver_data = fujitsu_laptop;
 
 	/* kfifo */
-	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+	spin_lock_init(&fujitsu_laptop->fifo_lock);
+	error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int),
 			GFP_KERNEL);
 	if (error) {
 		pr_err("kfifo_alloc failed\n");
 		goto err_stop;
 	}
 
-	fujitsu_hotkey->input = input = input_allocate_device();
+	fujitsu_laptop->input = input = input_allocate_device();
 	if (!input) {
 		error = -ENOMEM;
 		goto err_free_fifo;
 	}
 
-	snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+	snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys),
 		 "%s/video/input0", acpi_device_hid(device));
 
 	input->name = acpi_device_name(device);
-	input->phys = fujitsu_hotkey->phys;
+	input->phys = fujitsu_laptop->phys;
 	input->id.bustype = BUS_HOST;
 	input->id.product = 0x06;
 	input->dev.parent = &device->dev;
@@ -894,7 +894,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	if (error)
 		goto err_free_input_dev;
 
-	error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
+	error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state);
 	if (error) {
 		pr_err("Error reading power state\n");
 		goto err_unregister_input_dev;
@@ -904,7 +904,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		acpi_device_name(device), acpi_device_bid(device),
 		!device->power.state ? "on" : "off");
 
-	fujitsu_hotkey->dev = device;
+	fujitsu_laptop->dev = device;
 
 	if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
 		vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -920,16 +920,16 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		; /* No action, result is discarded */
 	vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
 
-	fujitsu_hotkey->rfkill_supported =
+	fujitsu_laptop->rfkill_supported =
 		call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
 
 	/* Make sure our bitmask of supported functions is cleared if the
 	   RFKILL function block is not implemented, like on the S7020. */
-	if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD)
-		fujitsu_hotkey->rfkill_supported = 0;
+	if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD)
+		fujitsu_laptop->rfkill_supported = 0;
 
-	if (fujitsu_hotkey->rfkill_supported)
-		fujitsu_hotkey->rfkill_state =
+	if (fujitsu_laptop->rfkill_supported)
+		fujitsu_laptop->rfkill_state =
 			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
 
 	/* Suspect this is a keymap of the application panel, print it */
@@ -940,7 +940,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&logolamp_led);
 		if (result == 0) {
-			fujitsu_hotkey->logolamp_registered = 1;
+			fujitsu_laptop->logolamp_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for logo lamp, error %i\n",
 			       result);
@@ -952,7 +952,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&kblamps_led);
 		if (result == 0) {
-			fujitsu_hotkey->kblamps_registered = 1;
+			fujitsu_laptop->kblamps_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for keyboard lamps, error %i\n",
 			       result);
@@ -969,7 +969,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&radio_led);
 		if (result == 0) {
-			fujitsu_hotkey->radio_led_registered = 1;
+			fujitsu_laptop->radio_led_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for radio LED, error %i\n",
 			       result);
@@ -986,7 +986,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&eco_led);
 		if (result == 0) {
-			fujitsu_hotkey->eco_led_registered = 1;
+			fujitsu_laptop->eco_led_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for eco LED, error %i\n",
 			       result);
@@ -1002,47 +1002,47 @@ err_unregister_input_dev:
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
-	kfifo_free(&fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_laptop->fifo);
 err_stop:
 	return error;
 }
 
-static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
+static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
 {
-	struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
-	struct input_dev *input = fujitsu_hotkey->input;
+	struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device);
+	struct input_dev *input = fujitsu_laptop->input;
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
-	if (fujitsu_hotkey->logolamp_registered)
+	if (fujitsu_laptop->logolamp_registered)
 		led_classdev_unregister(&logolamp_led);
 
-	if (fujitsu_hotkey->kblamps_registered)
+	if (fujitsu_laptop->kblamps_registered)
 		led_classdev_unregister(&kblamps_led);
 
-	if (fujitsu_hotkey->radio_led_registered)
+	if (fujitsu_laptop->radio_led_registered)
 		led_classdev_unregister(&radio_led);
 
-	if (fujitsu_hotkey->eco_led_registered)
+	if (fujitsu_laptop->eco_led_registered)
 		led_classdev_unregister(&eco_led);
 #endif
 
 	input_unregister_device(input);
 
-	kfifo_free(&fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_laptop->fifo);
 
-	fujitsu_hotkey->acpi_handle = NULL;
+	fujitsu_laptop->acpi_handle = NULL;
 
 	return 0;
 }
 
-static void acpi_fujitsu_hotkey_press(int keycode)
+static void acpi_fujitsu_laptop_press(int keycode)
 {
-	struct input_dev *input = fujitsu_hotkey->input;
+	struct input_dev *input = fujitsu_laptop->input;
 	int status;
 
-	status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+	status = kfifo_in_locked(&fujitsu_laptop->fifo,
 				 (unsigned char *)&keycode, sizeof(keycode),
-				 &fujitsu_hotkey->fifo_lock);
+				 &fujitsu_laptop->fifo_lock);
 	if (status != sizeof(keycode)) {
 		vdbg_printk(FUJLAPTOP_DBG_WARN,
 			    "Could not push keycode [0x%x]\n", keycode);
@@ -1054,16 +1054,16 @@ static void acpi_fujitsu_hotkey_press(int keycode)
 		    "Push keycode into ringbuffer [%d]\n", keycode);
 }
 
-static void acpi_fujitsu_hotkey_release(void)
+static void acpi_fujitsu_laptop_release(void)
 {
-	struct input_dev *input = fujitsu_hotkey->input;
+	struct input_dev *input = fujitsu_laptop->input;
 	int keycode, status;
 
 	while (true) {
-		status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+		status = kfifo_out_locked(&fujitsu_laptop->fifo,
 					  (unsigned char *)&keycode,
 					  sizeof(keycode),
-					  &fujitsu_hotkey->fifo_lock);
+					  &fujitsu_laptop->fifo_lock);
 		if (status != sizeof(keycode))
 			return;
 		input_report_key(input, keycode, 0);
@@ -1073,14 +1073,14 @@ static void acpi_fujitsu_hotkey_release(void)
 	}
 }
 
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 {
 	struct input_dev *input;
 	int keycode;
 	unsigned int irb = 1;
 	int i;
 
-	input = fujitsu_hotkey->input;
+	input = fujitsu_laptop->input;
 
 	if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
 		keycode = KEY_UNKNOWN;
@@ -1093,8 +1093,8 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 		return;
 	}
 
-	if (fujitsu_hotkey->rfkill_supported)
-		fujitsu_hotkey->rfkill_state =
+	if (fujitsu_laptop->rfkill_supported)
+		fujitsu_laptop->rfkill_state =
 			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
 
 	i = 0;
@@ -1128,16 +1128,16 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 		}
 
 		if (keycode > 0)
-			acpi_fujitsu_hotkey_press(keycode);
+			acpi_fujitsu_laptop_press(keycode);
 		else if (keycode == 0)
-			acpi_fujitsu_hotkey_release();
+			acpi_fujitsu_laptop_release();
 	}
 
 	/* On some models (first seen on the Skylake-based Lifebook
 	 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
 	 * handled in software; its state is queried using FUNC_RFKILL
 	 */
-	if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
+	if ((fujitsu_laptop->rfkill_supported & BIT(26)) &&
 	    (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
 		keycode = KEY_TOUCHPAD_TOGGLE;
 		input_report_key(input, keycode, 1);
@@ -1166,25 +1166,25 @@ static struct acpi_driver acpi_fujitsu_bl_driver = {
 		},
 };
 
-static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
-	{ACPI_FUJITSU_HOTKEY_HID, 0},
+static const struct acpi_device_id fujitsu_laptop_device_ids[] = {
+	{ACPI_FUJITSU_LAPTOP_HID, 0},
 	{"", 0},
 };
 
-static struct acpi_driver acpi_fujitsu_hotkey_driver = {
-	.name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_laptop_driver = {
+	.name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME,
 	.class = ACPI_FUJITSU_CLASS,
-	.ids = fujitsu_hotkey_device_ids,
+	.ids = fujitsu_laptop_device_ids,
 	.ops = {
-		.add = acpi_fujitsu_hotkey_add,
-		.remove = acpi_fujitsu_hotkey_remove,
-		.notify = acpi_fujitsu_hotkey_notify,
+		.add = acpi_fujitsu_laptop_add,
+		.remove = acpi_fujitsu_laptop_remove,
+		.notify = acpi_fujitsu_laptop_notify,
 		},
 };
 
 static const struct acpi_device_id fujitsu_ids[] __used = {
 	{ACPI_FUJITSU_BL_HID, 0},
-	{ACPI_FUJITSU_HOTKEY_HID, 0},
+	{ACPI_FUJITSU_LAPTOP_HID, 0},
 	{"", 0}
 };
 MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
@@ -1255,18 +1255,18 @@ static int __init fujitsu_init(void)
 	if (ret)
 		goto fail_backlight;
 
-	/* Register hotkey driver */
+	/* Register laptop driver */
 
-	fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
-	if (!fujitsu_hotkey) {
+	fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL);
+	if (!fujitsu_laptop) {
 		ret = -ENOMEM;
-		goto fail_hotkey;
+		goto fail_laptop;
 	}
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
+	result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
 	if (result < 0) {
 		ret = -ENODEV;
-		goto fail_hotkey1;
+		goto fail_laptop1;
 	}
 
 	/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
@@ -1281,9 +1281,9 @@ static int __init fujitsu_init(void)
 
 	return 0;
 
-fail_hotkey1:
-	kfree(fujitsu_hotkey);
-fail_hotkey:
+fail_laptop1:
+	kfree(fujitsu_laptop);
+fail_laptop:
 	platform_driver_unregister(&fujitsupf_driver);
 fail_backlight:
 	backlight_device_unregister(fujitsu_bl->bl_device);
@@ -1304,9 +1304,9 @@ fail_acpi:
 
 static void __exit fujitsu_cleanup(void)
 {
-	acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver);
 
-	kfree(fujitsu_hotkey);
+	kfree(fujitsu_laptop);
 
 	platform_driver_unregister(&fujitsupf_driver);
 
-- 
cgit v1.2.3


From 1650602691f4e8ea8f6e306452a72ac9a611932a Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:26 +0100
Subject: platform/x86: fujitsu-laptop: make platform-related variables match
 naming convention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace "fujitsupf" with "fujitsu_pf" in all platform-related variable
names to match the module-wide naming convention.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 70124004b346..6cdd1b334e8a 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -607,7 +607,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store);
 static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
 static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
 
-static struct attribute *fujitsupf_attributes[] = {
+static struct attribute *fujitsu_pf_attributes[] = {
 	&dev_attr_brightness_changed.attr,
 	&dev_attr_max_brightness.attr,
 	&dev_attr_lcd_level.attr,
@@ -617,11 +617,11 @@ static struct attribute *fujitsupf_attributes[] = {
 	NULL
 };
 
-static struct attribute_group fujitsupf_attribute_group = {
-	.attrs = fujitsupf_attributes
+static struct attribute_group fujitsu_pf_attribute_group = {
+	.attrs = fujitsu_pf_attributes
 };
 
-static struct platform_driver fujitsupf_driver = {
+static struct platform_driver fujitsu_pf_driver = {
 	.driver = {
 		   .name = "fujitsu-laptop",
 		   }
@@ -1226,7 +1226,7 @@ static int __init fujitsu_init(void)
 
 	ret =
 	    sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
-			       &fujitsupf_attribute_group);
+			       &fujitsu_pf_attribute_group);
 	if (ret)
 		goto fail_platform_device2;
 
@@ -1251,7 +1251,7 @@ static int __init fujitsu_init(void)
 		fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
 	}
 
-	ret = platform_driver_register(&fujitsupf_driver);
+	ret = platform_driver_register(&fujitsu_pf_driver);
 	if (ret)
 		goto fail_backlight;
 
@@ -1284,12 +1284,12 @@ static int __init fujitsu_init(void)
 fail_laptop1:
 	kfree(fujitsu_laptop);
 fail_laptop:
-	platform_driver_unregister(&fujitsupf_driver);
+	platform_driver_unregister(&fujitsu_pf_driver);
 fail_backlight:
 	backlight_device_unregister(fujitsu_bl->bl_device);
 fail_sysfs_group:
 	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
-			   &fujitsupf_attribute_group);
+			   &fujitsu_pf_attribute_group);
 fail_platform_device2:
 	platform_device_del(fujitsu_bl->pf_device);
 fail_platform_device1:
@@ -1308,12 +1308,12 @@ static void __exit fujitsu_cleanup(void)
 
 	kfree(fujitsu_laptop);
 
-	platform_driver_unregister(&fujitsupf_driver);
+	platform_driver_unregister(&fujitsu_pf_driver);
 
 	backlight_device_unregister(fujitsu_bl->bl_device);
 
 	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
-			   &fujitsupf_attribute_group);
+			   &fujitsu_pf_attribute_group);
 
 	platform_device_unregister(fujitsu_bl->pf_device);
 
-- 
cgit v1.2.3


From 8ef27bd3410c4e5856bac2315ef51224926020e0 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:27 +0100
Subject: platform/x86: fujitsu-laptop: rename FUNC_RFKILL to FUNC_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FUNC subfunction 0x1000 is currently referred to as FUNC_RFKILL, which
is misleading, because it handles more than just radio devices (also
lid, dock, LEDs).  Rename the FUNC_RFKILL constant to FUNC_FLAGS.
Replace "rfkill" with "flags" in the names of its associated fields
inside struct fujitsu_laptop.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 50 +++++++++++++++++------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 6cdd1b334e8a..55d696262301 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -89,7 +89,7 @@
 #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
 
 /* FUNC interface - command values */
-#define FUNC_RFKILL	0x1000
+#define FUNC_FLAGS	0x1000
 #define FUNC_LEDS	0x1001
 #define FUNC_BUTTONS	0x1002
 #define FUNC_BACKLIGHT  0x1004
@@ -163,8 +163,8 @@ struct fujitsu_laptop {
 	struct platform_device *pf_device;
 	struct kfifo fifo;
 	spinlock_t fifo_lock;
-	int rfkill_supported;
-	int rfkill_state;
+	int flags_supported;
+	int flags_state;
 	int logolamp_registered;
 	int kblamps_registered;
 	int radio_led_registered;
@@ -300,9 +300,9 @@ static int radio_led_set(struct led_classdev *cdev,
 				enum led_brightness brightness)
 {
 	if (brightness >= LED_FULL)
-		return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON);
+		return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON);
 	else
-		return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
+		return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0);
 }
 
 static int eco_led_set(struct led_classdev *cdev,
@@ -346,7 +346,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev)
 {
 	enum led_brightness brightness = LED_OFF;
 
-	if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON)
+	if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON)
 		brightness = LED_FULL;
 
 	return brightness;
@@ -567,9 +567,9 @@ static ssize_t
 show_lid_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->rfkill_supported & 0x100))
+	if (!(fujitsu_laptop->flags_supported & 0x100))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->rfkill_state & 0x100)
+	if (fujitsu_laptop->flags_state & 0x100)
 		return sprintf(buf, "open\n");
 	else
 		return sprintf(buf, "closed\n");
@@ -579,9 +579,9 @@ static ssize_t
 show_dock_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->rfkill_supported & 0x200))
+	if (!(fujitsu_laptop->flags_supported & 0x200))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->rfkill_state & 0x200)
+	if (fujitsu_laptop->flags_state & 0x200)
 		return sprintf(buf, "docked\n");
 	else
 		return sprintf(buf, "undocked\n");
@@ -591,9 +591,9 @@ static ssize_t
 show_radios_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->rfkill_supported & 0x20))
+	if (!(fujitsu_laptop->flags_supported & 0x20))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->rfkill_state & 0x20)
+	if (fujitsu_laptop->flags_state & 0x20)
 		return sprintf(buf, "on\n");
 	else
 		return sprintf(buf, "killed\n");
@@ -920,17 +920,17 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 		; /* No action, result is discarded */
 	vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
 
-	fujitsu_laptop->rfkill_supported =
-		call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
+	fujitsu_laptop->flags_supported =
+		call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0);
 
 	/* Make sure our bitmask of supported functions is cleared if the
 	   RFKILL function block is not implemented, like on the S7020. */
-	if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD)
-		fujitsu_laptop->rfkill_supported = 0;
+	if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD)
+		fujitsu_laptop->flags_supported = 0;
 
-	if (fujitsu_laptop->rfkill_supported)
-		fujitsu_laptop->rfkill_state =
-			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+	if (fujitsu_laptop->flags_supported)
+		fujitsu_laptop->flags_state =
+			call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
 
 	/* Suspect this is a keymap of the application panel, print it */
 	pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
@@ -1093,9 +1093,9 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 		return;
 	}
 
-	if (fujitsu_laptop->rfkill_supported)
-		fujitsu_laptop->rfkill_state =
-			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+	if (fujitsu_laptop->flags_supported)
+		fujitsu_laptop->flags_state =
+			call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
 
 	i = 0;
 	while ((irb =
@@ -1135,10 +1135,10 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 
 	/* On some models (first seen on the Skylake-based Lifebook
 	 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
-	 * handled in software; its state is queried using FUNC_RFKILL
+	 * handled in software; its state is queried using FUNC_FLAGS
 	 */
-	if ((fujitsu_laptop->rfkill_supported & BIT(26)) &&
-	    (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+	if ((fujitsu_laptop->flags_supported & BIT(26)) &&
+	    (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) {
 		keycode = KEY_TOUCHPAD_TOGGLE;
 		input_report_key(input, keycode, 1);
 		input_sync(input);
-- 
cgit v1.2.3


From d3dd4480f8a911198d85b3cdb97f22db6539d2d1 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:28 +0100
Subject: platform/x86: fujitsu-laptop: replace numeric values with constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace three repeating numeric values with constants to improve code
clarity.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 55d696262301..deef40a03b6d 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -97,6 +97,11 @@
 /* FUNC interface - responses */
 #define UNSUPPORTED_CMD 0x80000000
 
+/* FUNC interface - status flags */
+#define FLAG_RFKILL	0x020
+#define FLAG_LID	0x100
+#define FLAG_DOCK	0x200
+
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 /* FUNC interface - LED control */
 #define FUNC_LED_OFF	0x1
@@ -567,9 +572,9 @@ static ssize_t
 show_lid_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->flags_supported & 0x100))
+	if (!(fujitsu_laptop->flags_supported & FLAG_LID))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->flags_state & 0x100)
+	if (fujitsu_laptop->flags_state & FLAG_LID)
 		return sprintf(buf, "open\n");
 	else
 		return sprintf(buf, "closed\n");
@@ -579,9 +584,9 @@ static ssize_t
 show_dock_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->flags_supported & 0x200))
+	if (!(fujitsu_laptop->flags_supported & FLAG_DOCK))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->flags_state & 0x200)
+	if (fujitsu_laptop->flags_state & FLAG_DOCK)
 		return sprintf(buf, "docked\n");
 	else
 		return sprintf(buf, "undocked\n");
@@ -591,9 +596,9 @@ static ssize_t
 show_radios_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->flags_supported & 0x20))
+	if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->flags_state & 0x20)
+	if (fujitsu_laptop->flags_state & FLAG_RFKILL)
 		return sprintf(buf, "on\n");
 	else
 		return sprintf(buf, "killed\n");
-- 
cgit v1.2.3


From 8c590e339f37022dff209ef16cf699531df1a0ca Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:29 +0100
Subject: platform/x86: fujitsu-laptop: remove redundant forward declarations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both acpi_fujitsu_bl_notify() and acpi_fujitsu_laptop_notify() are
defined before they are first used, so remove their forward declarations
as they are redundant.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index deef40a03b6d..ba0c0c211251 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -178,8 +178,6 @@ struct fujitsu_laptop {
 
 static struct fujitsu_laptop *fujitsu_laptop;
 
-static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event);
-
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 static enum led_brightness logolamp_get(struct led_classdev *cdev);
 static int logolamp_set(struct led_classdev *cdev,
@@ -227,8 +225,6 @@ static struct led_classdev eco_led = {
 static u32 dbg_level = 0x03;
 #endif
 
-static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event);
-
 /* Fujitsu ACPI interface function */
 
 static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
-- 
cgit v1.2.3


From c1d1e8a051761fb808308dab32b9351e1d1fbb9b Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:30 +0100
Subject: platform/x86: fujitsu-laptop: simplify acpi_bus_register_driver()
 error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A separate variable is not needed to handle error codes returned by
acpi_bus_register_driver().  If the latter fails, just use the value it
returned as the value returned by fujitsu_init().

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index ba0c0c211251..a5478a011b90 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1192,7 +1192,7 @@ MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
 
 static int __init fujitsu_init(void)
 {
-	int ret, result, max_brightness;
+	int ret, max_brightness;
 
 	if (acpi_disabled)
 		return -ENODEV;
@@ -1207,11 +1207,9 @@ static int __init fujitsu_init(void)
 	fujitsu_bl->keycode5 = KEY_RFKILL;
 	dmi_check_system(fujitsu_dmi_table);
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
-	if (result < 0) {
-		ret = -ENODEV;
+	ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
+	if (ret)
 		goto fail_acpi;
-	}
 
 	/* Register platform stuff */
 
@@ -1264,11 +1262,9 @@ static int __init fujitsu_init(void)
 		goto fail_laptop;
 	}
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
-	if (result < 0) {
-		ret = -ENODEV;
+	ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
+	if (ret)
 		goto fail_laptop1;
-	}
 
 	/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
 	if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
-- 
cgit v1.2.3


From 5296a73613814a15e54ebddc2843ec0f84951d60 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:32 +0100
Subject: platform/x86: fujitsu-laptop: autodetect LCD interface on all models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Presence of ACPI method SBL2 should be checked on all models rather than
just the ones with predefined hotkey keycode overrides.  Move most of
dmi_check_cb_common() to acpi_fujitsu_bl_add().  Adjust indentation to
make checkpatch happy.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index a5478a011b90..56804c4db33f 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -631,15 +631,6 @@ static struct platform_driver fujitsu_pf_driver = {
 static void __init dmi_check_cb_common(const struct dmi_system_id *id)
 {
 	pr_info("Identified laptop model '%s'\n", id->ident);
-	if (use_alt_lcd_levels == -1) {
-		if (acpi_has_method(NULL,
-				"\\_SB.PCI0.LPCB.FJEX.SBL2"))
-			use_alt_lcd_levels = 1;
-		else
-			use_alt_lcd_levels = 0;
-		vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
-			"%i\n", use_alt_lcd_levels);
-	}
 }
 
 static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
@@ -751,6 +742,15 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
 			pr_err("_INI Method failed\n");
 	}
 
+	if (use_alt_lcd_levels == -1) {
+		if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2"))
+			use_alt_lcd_levels = 1;
+		else
+			use_alt_lcd_levels = 0;
+		vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n",
+			    use_alt_lcd_levels);
+	}
+
 	/* do config (detect defaults) */
 	use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
 	disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
-- 
cgit v1.2.3


From 5802d0bc3fe9f598f0ff3f5fd7fd8c5c935a1b5d Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:33 +0100
Subject: platform/x86: fujitsu-laptop: remove redundant MODULE_ALIAS entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MODULE_DEVICE_TABLE is all that is needed for fujitsu-laptop to be
properly autoloaded based on presence of its associated ACPI devices, so
remove redundant MODULE_ALIAS entries.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 56804c4db33f..e12cc3504d48 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1338,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon");
 MODULE_DESCRIPTION("Fujitsu laptop extras support");
 MODULE_VERSION(FUJITSU_DRIVER_VERSION);
 MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
-MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
-- 
cgit v1.2.3


From 71050ae7bf83e4d71a859257d11adc5de517073e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@gmail.com>
Date: Mon, 20 Feb 2017 14:50:22 -0500
Subject: platform/x86: asus-wmi: Detect quirk_no_rfkill from the DSDT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Asus laptops that have an airplane-mode indicator LED, also have
the WMI WLAN user bit set, and the following bits in their DSDT:

    Scope (_SB)
    {
      (...)
      Device (ATKD)
      {
        (...)
        Method (WMNB, 3, Serialized)
        {
          (...)
          If (LEqual (IIA0, 0x00010002))
          {
            OWGD (IIA1)
            Return (One)
          }
        }
      }
    }

So when asus-wmi uses ASUS_WMI_DEVID_WLAN_LED (0x00010002) to store the
wlan state, it drives the airplane-mode indicator LED (through the call
to OWGD) in an inverted fashion: the LED is ON when airplane mode is OFF
(since wlan is ON), and vice-versa.

This commit skips registering RFKill switches at all for these laptops,
to allow the asus-wireless driver to drive the airplane mode LED
correctly through the ASHS ACPI device. Relying on the presence of ASHS
and ASUS_WMI_DSTS_USER_BIT avoids adding DMI-based quirks for at least
21 different laptops.

Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 43cb680adbb4..8499d3ae4257 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -159,6 +159,8 @@ MODULE_LICENSE("GPL");
 #define USB_INTEL_XUSB2PR		0xD0
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI	0x9c31
 
+static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+
 struct bios_args {
 	u32 arg0;
 	u32 arg1;
@@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
 	return 0;
 }
 
+static bool ashs_present(void)
+{
+	int i = 0;
+	while (ashs_ids[i]) {
+		if (acpi_dev_found(ashs_ids[i++]))
+			return true;
+	}
+	return false;
+}
+
 /*
  * WMI Driver
  */
@@ -2095,6 +2107,13 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (err)
 		goto fail_leds;
 
+	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
+	if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+		asus->driver->wlan_ctrl_by_user = 1;
+
+	if (asus->driver->wlan_ctrl_by_user && ashs_present())
+		asus->driver->quirks->no_rfkill = 1;
+
 	if (!asus->driver->quirks->no_rfkill) {
 		err = asus_wmi_rfkill_init(asus);
 		if (err)
@@ -2134,10 +2153,6 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (err)
 		goto fail_debugfs;
 
-	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
-	if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
-		asus->driver->wlan_ctrl_by_user = 1;
-
 	return 0;
 
 fail_debugfs:
-- 
cgit v1.2.3


From d825adb48cf9bf9e3f5cb1d927e2827f8c2abee4 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 26 Feb 2017 16:15:21 +0900
Subject: xenbus: Remove duplicate inclusion of linux/init.h

This patch remove duplicate inclusion of linux/init.h in
xenbus_dev_frontend.c.
Confirm successfully compile after remove the line.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 4d343eed08f5..1f4733b80c87 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -55,7 +55,6 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/miscdevice.h>
-#include <linux/init.h>
 
 #include <xen/xenbus.h>
 #include <xen/xen.h>
-- 
cgit v1.2.3


From 3a150df945b7408c27cad2c01a1638e8b14ac562 Mon Sep 17 00:00:00 2001
From: Chunyu Hu <chuhu@redhat.com>
Date: Wed, 22 Feb 2017 08:29:26 +0800
Subject: tracing: Fix code comment for ftrace_ops_get_func()

There is no function 'ftrace_ops_recurs_func' existing in the current code,
it was renamed to ftrace_ops_assist_func() in commit c68c0fa29341
("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too").
Update the comment to the correct function name.

Link: http://lkml.kernel.org/r/1487723366-14463-1-git-send-email-chuhu@redhat.com

Signed-off-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0c0609326391..fd84f2e30b6d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5487,7 +5487,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
  * Normally the mcount trampoline will call the ops->func, but there
  * are times that it should not. For example, if the ops does not
  * have its own recursion protection, then it should call the
- * ftrace_ops_recurs_func() instead.
+ * ftrace_ops_assist_func() instead.
  *
  * Returns the function that the trampoline should call for @ops.
  */
-- 
cgit v1.2.3


From dcc235279a52d49023d4f1af7c3ed468a97015ae Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:29:03 +0100
Subject: gcc-plugins: fix sancov_plugin for gcc-5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The name of the local variable was inadvertantly changed from
sancov_plugin_pass_info to sancov_pass_info:

scripts/gcc-plugins/sancov_plugin.c: In function ‘int plugin_init(plugin_name_args*, plugin_gcc_version*)’:
scripts/gcc-plugins/sancov_plugin.c:136:67: error: ‘sancov_plugin_pass_info’ was not declared in this scope

This changes the conditional reference to this variable as well.

Fixes: 5a45a4c5c3f5 ("gcc-plugins: consolidate on PASS_INFO macro")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/sancov_plugin.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c
index 9b0b5cbc5b89..0f98634c20a0 100644
--- a/scripts/gcc-plugins/sancov_plugin.c
+++ b/scripts/gcc-plugins/sancov_plugin.c
@@ -133,7 +133,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc
 #if BUILDING_GCC_VERSION < 6000
 	register_callback(plugin_name, PLUGIN_START_UNIT, &sancov_start_unit, NULL);
 	register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, (void *)&gt_ggc_r_gt_sancov);
-	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_plugin_pass_info);
+	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_pass_info);
 #endif
 
 	return 0;
-- 
cgit v1.2.3


From f5432f01240ef69a391940d623b6a51768aefd65 Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Wed, 15 Feb 2017 20:42:52 +0530
Subject: ARM: dts: am57xx-idk: tpic2810 is on I2C bus, not SPI

commit 50e95b6b854c ("ARM: dts: am57xx-idk: Add Industrial
output support") added the TPIC2810 device-tree node under
SPI bus instead of I2C1.

Fix it. Tested on AM572x IDK by driving on-board LEDs
connected to TPIC2810

Fixes: 50e95b6b854c ("ARM: dts: am57xx-idk: Add Industrial output support")
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Acked-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am57xx-idk-common.dtsi | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 814a720d5c3d..d0a55b845690 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -311,6 +311,13 @@
 			/* ID & VBUS GPIOs provided in board dts */
 		};
 	};
+
+	tpic2810: tpic2810@60 {
+		compatible = "ti,tpic2810";
+		reg = <0x60>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
 };
 
 &mcspi3 {
@@ -326,13 +333,6 @@
 		spi-max-frequency = <1000000>;
 		spi-cpol;
 	};
-
-	tpic2810: tpic2810@60 {
-		compatible = "ti,tpic2810";
-		reg = <0x60>;
-		gpio-controller;
-		#gpio-cells = <2>;
-	};
 };
 
 &uart3 {
-- 
cgit v1.2.3


From 0341735226dcfa9d3727ff001e030f879ab91ca2 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 17 Feb 2017 06:51:17 -0800
Subject: ARM: omap2plus_defconfig: Enable INPUT_MOUSEDEV as loadable modules

Otherwise mice won't be happy.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/configs/omap2plus_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 195c98b85568..77ffccfd0c3f 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -188,6 +188,7 @@ CONFIG_WL12XX=m
 CONFIG_WL18XX=m
 CONFIG_WLCORE_SPI=m
 CONFIG_WLCORE_SDIO=m
+CONFIG_INPUT_MOUSEDEV=m
 CONFIG_INPUT_JOYDEV=m
 CONFIG_INPUT_EVDEV=m
 CONFIG_KEYBOARD_ATKBD=m
-- 
cgit v1.2.3


From 48385896e9c53e1061f103e1271a6be9a7ea00c4 Mon Sep 17 00:00:00 2001
From: Teresa Remmet <t.remmet@phytec.de>
Date: Fri, 10 Feb 2017 13:29:07 +0100
Subject: ARM: dts: am335x-pcm953: Fix legacy wakeup source binding

Replaced the legacy binding "gpio-key,wakeup" with "wakeup-source" as
noted in the kernel documentation.

Signed-off-by: Teresa Remmet <t.remmet@phytec.de>
Reported-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-pcm-953.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi
index 02981eae96b9..1ec8e0d80191 100644
--- a/arch/arm/boot/dts/am335x-pcm-953.dtsi
+++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi
@@ -63,14 +63,14 @@
 			label = "home";
 			linux,code = <KEY_HOME>;
 			gpios = <&gpio3 7 GPIO_ACTIVE_HIGH>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		button@1 {
 			label = "menu";
 			linux,code = <KEY_MENU>;
 			gpios = <&gpio3 8 GPIO_ACTIVE_HIGH>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 	};
-- 
cgit v1.2.3


From 7807e086a2d1f69cc1a57958cac04fea79fc2112 Mon Sep 17 00:00:00 2001
From: Ladislav Michl <ladis@linux-mips.org>
Date: Sat, 11 Feb 2017 14:02:49 +0100
Subject: ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure

gpmc_probe_onenand_child returns success even on gpmc_onenand_init
failure. Fix that.

Signed-off-by: Ladislav Michl <ladis@linux-mips.org>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/gpmc-onenand.c | 10 ++++++----
 drivers/memory/omap-gpmc.c         |  4 +---
 include/linux/omap-gpmc.h          |  5 +++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 8633c703546a..2944af820558 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr)
 	return ret;
 }
 
-void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
+int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
 {
 	int err;
 	struct device *dev = &gpmc_onenand_device.dev;
@@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
 	if (err < 0) {
 		dev_err(dev, "Cannot request GPMC CS %d, error %d\n",
 			gpmc_onenand_data->cs, err);
-		return;
+		return err;
 	}
 
 	gpmc_onenand_resource.end = gpmc_onenand_resource.start +
 							ONENAND_IO_SIZE - 1;
 
-	if (platform_device_register(&gpmc_onenand_device) < 0) {
+	err = platform_device_register(&gpmc_onenand_device);
+	if (err) {
 		dev_err(dev, "Unable to register OneNAND device\n");
 		gpmc_cs_free(gpmc_onenand_data->cs);
-		return;
 	}
+
+	return err;
 }
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 5457c361ad58..bf0fe0137dfe 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev,
 	if (!of_property_read_u32(child, "dma-channel", &val))
 		gpmc_onenand_data->dma_channel = val;
 
-	gpmc_onenand_init(gpmc_onenand_data);
-
-	return 0;
+	return gpmc_onenand_init(gpmc_onenand_data);
 }
 #else
 static int gpmc_probe_onenand_child(struct platform_device *pdev,
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 35d0fd7a4948..e821a3132a3e 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
 #endif
 
 #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
 #else
 #define board_onenand_data	NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
 {
+	return 0;
 }
 #endif
-- 
cgit v1.2.3


From ac28e47ccc3ff8dabce1aec6b224760c3e524044 Mon Sep 17 00:00:00 2001
From: Ladislav Michl <ladis@linux-mips.org>
Date: Tue, 21 Feb 2017 10:44:45 +0100
Subject: ARM: OMAP2+: Remove legacy gpmc-nand.c

This code is no longer used and can be removed as we
are using device tree.

Removing this code also removes a dependency between
drivers/mtd and arch/arm/mach-omap2 making furhter driver
changes easier.

Signed-off-by: Ladislav Michl <ladis@linux-mips.org>
[tony@atomide.com: removed from header too, updated comments]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Makefile    |   3 -
 arch/arm/mach-omap2/gpmc-nand.c | 154 ----------------------------------------
 include/linux/omap-gpmc.h       |  11 ---
 3 files changed, 168 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/gpmc-nand.c

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 093458b62c8d..c89757abb0ae 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -241,6 +241,3 @@ obj-$(CONFIG_MACH_OMAP2_TUSB6010)	+= usb-tusb6010.o
 
 onenand-$(CONFIG_MTD_ONENAND_OMAP2)	:= gpmc-onenand.o
 obj-y					+= $(onenand-m) $(onenand-y)
-
-nand-$(CONFIG_MTD_NAND_OMAP2)		:= gpmc-nand.o
-obj-y					+= $(nand-m) $(nand-y)
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c
deleted file mode 100644
index f6ac027f3c3b..000000000000
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * gpmc-nand.c
- *
- * Copyright (C) 2009 Texas Instruments
- * Vimal Singh <vimalsingh@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/omap-gpmc.h>
-#include <linux/mtd/nand.h>
-#include <linux/platform_data/mtd-nand-omap2.h>
-
-#include <asm/mach/flash.h>
-
-#include "soc.h"
-
-/* minimum size for IO mapping */
-#define	NAND_IO_SIZE	4
-
-static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt)
-{
-	/* platforms which support all ECC schemes */
-	if (soc_is_am33xx() || soc_is_am43xx() || cpu_is_omap44xx() ||
-		 soc_is_omap54xx() || soc_is_dra7xx())
-		return 1;
-
-	if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW ||
-		 ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) {
-		if (cpu_is_omap24xx())
-			return 0;
-		else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0))
-			return 0;
-		else
-			return 1;
-	}
-
-	/* OMAP3xxx do not have ELM engine, so cannot support ECC schemes
-	 * which require H/W based ECC error detection */
-	if ((cpu_is_omap34xx() || cpu_is_omap3630()) &&
-	    ((ecc_opt == OMAP_ECC_BCH4_CODE_HW) ||
-		 (ecc_opt == OMAP_ECC_BCH8_CODE_HW)))
-		return 0;
-
-	/* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */
-	if (ecc_opt == OMAP_ECC_HAM1_CODE_HW ||
-	    ecc_opt == OMAP_ECC_HAM1_CODE_SW)
-		return 1;
-	else
-		return 0;
-}
-
-/* This function will go away once the device-tree convertion is complete */
-static void gpmc_set_legacy(struct omap_nand_platform_data *gpmc_nand_data,
-			    struct gpmc_settings *s)
-{
-	/* Enable RD PIN Monitoring Reg */
-	if (gpmc_nand_data->dev_ready) {
-		s->wait_on_read = true;
-		s->wait_on_write = true;
-	}
-
-	if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16)
-		s->device_width = GPMC_DEVWIDTH_16BIT;
-	else
-		s->device_width = GPMC_DEVWIDTH_8BIT;
-}
-
-int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data,
-		   struct gpmc_timings *gpmc_t)
-{
-	int err	= 0;
-	struct gpmc_settings s;
-	struct platform_device *pdev;
-	struct resource gpmc_nand_res[] = {
-		{ .flags = IORESOURCE_MEM, },
-		{ .flags = IORESOURCE_IRQ, },
-		{ .flags = IORESOURCE_IRQ, },
-	};
-
-	BUG_ON(gpmc_nand_data->cs >= GPMC_CS_NUM);
-
-	err = gpmc_cs_request(gpmc_nand_data->cs, NAND_IO_SIZE,
-			      (unsigned long *)&gpmc_nand_res[0].start);
-	if (err < 0) {
-		pr_err("omap2-gpmc: Cannot request GPMC CS %d, error %d\n",
-		       gpmc_nand_data->cs, err);
-		return err;
-	}
-	gpmc_nand_res[0].end = gpmc_nand_res[0].start + NAND_IO_SIZE - 1;
-	gpmc_nand_res[1].start = gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE);
-	gpmc_nand_res[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT);
-
-	memset(&s, 0, sizeof(struct gpmc_settings));
-	gpmc_set_legacy(gpmc_nand_data, &s);
-
-	s.device_nand = true;
-
-	if (gpmc_t) {
-		err = gpmc_cs_set_timings(gpmc_nand_data->cs, gpmc_t, &s);
-		if (err < 0) {
-			pr_err("omap2-gpmc: Unable to set gpmc timings: %d\n",
-			       err);
-			return err;
-		}
-	}
-
-	err = gpmc_cs_program_settings(gpmc_nand_data->cs, &s);
-	if (err < 0)
-		goto out_free_cs;
-
-	err = gpmc_configure(GPMC_CONFIG_WP, 0);
-	if (err < 0)
-		goto out_free_cs;
-
-	if (!gpmc_hwecc_bch_capable(gpmc_nand_data->ecc_opt)) {
-		pr_err("omap2-nand: Unsupported NAND ECC scheme selected\n");
-		err = -EINVAL;
-		goto out_free_cs;
-	}
-
-
-	pdev = platform_device_alloc("omap2-nand", gpmc_nand_data->cs);
-	if (pdev) {
-		err = platform_device_add_resources(pdev, gpmc_nand_res,
-						    ARRAY_SIZE(gpmc_nand_res));
-		if (!err)
-			pdev->dev.platform_data = gpmc_nand_data;
-	} else {
-		err = -ENOMEM;
-	}
-	if (err)
-		goto out_free_pdev;
-
-	err = platform_device_add(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "Unable to register NAND device\n");
-		goto out_free_pdev;
-	}
-
-	return 0;
-
-out_free_pdev:
-	platform_device_put(pdev);
-out_free_cs:
-	gpmc_cs_free(gpmc_nand_data->cs);
-
-	return err;
-}
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index e821a3132a3e..fd0de00c0d77 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -76,17 +76,6 @@ struct gpmc_timings;
 struct omap_nand_platform_data;
 struct omap_onenand_platform_data;
 
-#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
-extern int gpmc_nand_init(struct omap_nand_platform_data *d,
-			  struct gpmc_timings *gpmc_t);
-#else
-static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
-				 struct gpmc_timings *gpmc_t)
-{
-	return 0;
-}
-#endif
-
 #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
 extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
 #else
-- 
cgit v1.2.3


From 0aa5277c3a6dbe9d5991c490ef23fe900254d931 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 28 Feb 2017 15:39:25 +0800
Subject: drm/i915/gvt: have more registers with F_CMD_ACCESS flags set

those registers are render registers, should have F_CMD_ACCESS flag set

v4:
rebase to lastest code base

v3:
per zhenyu's comments, move newly added registers to a separate patch

v2:
per Kevin's comments, move newly added registers to the tails of lists.

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 223 ++++++++++++++++++++----------------
 1 file changed, 123 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 57b4d538f370..37956eee342c 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1523,6 +1523,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
 #define MMIO_GM(reg, d, r, w) \
 	MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w)
 
+#define MMIO_GM_RDR(reg, d, r, w) \
+	MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w)
+
 #define MMIO_RO(reg, d, f, rm, r, w) \
 	MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w)
 
@@ -1542,6 +1545,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
 #define MMIO_RING_GM(prefix, d, r, w) \
 	MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w)
 
+#define MMIO_RING_GM_RDR(prefix, d, r, w) \
+	MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w)
+
 #define MMIO_RING_RO(prefix, d, f, rm, r, w) \
 	MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w)
 
@@ -1550,45 +1556,48 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	int ret;
 
-	MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL,
+		intel_vgpu_reg_imr_handler);
 
 	MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
 	MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler);
 	MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler);
 	MMIO_D(SDEISR, D_ALL);
 
-	MMIO_RING_D(RING_HWSTAM, D_ALL);
+	MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_GM(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-	MMIO_GM(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-	MMIO_GM(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-	MMIO_GM(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
 
 #define RING_REG(base) (base + 0x28)
-	MMIO_RING_D(RING_REG, D_ALL);
+	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x134)
-	MMIO_RING_D(RING_REG, D_ALL);
+	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
-	MMIO_GM(0x2148, D_ALL, NULL, NULL);
-	MMIO_GM(CCID, D_ALL, NULL, NULL);
-	MMIO_GM(0x12198, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(CCID, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL);
 	MMIO_D(GEN7_CXT_SIZE, D_ALL);
 
-	MMIO_RING_D(RING_TAIL, D_ALL);
-	MMIO_RING_D(RING_HEAD, D_ALL);
-	MMIO_RING_D(RING_CTL, D_ALL);
-	MMIO_RING_D(RING_ACTHD, D_ALL);
-	MMIO_RING_GM(RING_START, D_ALL, NULL, NULL);
+	MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL);
 
 	/* RING MODE */
 #define RING_REG(base) (base + 0x29c)
-	MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK, NULL, ring_mode_mmio_write);
+	MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		ring_mode_mmio_write);
 #undef RING_REG
 
-	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
 			NULL, NULL);
 	MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS,
@@ -1596,28 +1605,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS,
 			ring_timestamp_mmio_read, NULL);
 
-	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL);
-
-	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x2088, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_D(GAM_ECOCHK, D_ALL);
-	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0x9030, D_ALL);
-	MMIO_D(0x20a0, D_ALL);
-	MMIO_D(0x2420, D_ALL);
-	MMIO_D(0x2430, D_ALL);
-	MMIO_D(0x2434, D_ALL);
-	MMIO_D(0x2438, D_ALL);
-	MMIO_D(0x243c, D_ALL);
-	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
@@ -2148,8 +2159,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(FORCEWAKE_ACK, D_ALL);
 	MMIO_D(GEN6_GT_CORE_STATUS, D_ALL);
 	MMIO_D(GEN6_GT_THREAD_STATUS_REG, D_ALL);
-	MMIO_D(GTFIFODBG, D_ALL);
-	MMIO_D(GTFIFOCTL, D_ALL);
+	MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write);
 	MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL);
 	MMIO_D(ECOBUS, D_ALL);
@@ -2291,29 +2302,29 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL);
-	MMIO_D(GEN7_OACONTROL, D_HSW);
+	MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL);
+	MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0x2b00, D_BDW_PLUS);
 	MMIO_D(0x2360, D_BDW_PLUS);
-	MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x5240, 32, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x5280, 16, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
 
 	MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(BCS_SWCTRL, D_ALL);
-
-	MMIO_F(HS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(DS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(IA_VERTICES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(IA_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(VS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(GS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(GS_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(CL_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(CL_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(PS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(PS_DEPTH_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
 	MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
 	MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
 	MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
@@ -2329,7 +2340,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	int ret;
 
-	MMIO_DH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL,
+	MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL,
 			intel_vgpu_reg_imr_handler);
 
 	MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
@@ -2394,24 +2405,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL,
 		intel_vgpu_reg_master_irq_handler);
 
-	MMIO_D(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(0x1c134, D_BDW_PLUS);
-
-	MMIO_D(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(RING_HEAD(GEN8_BSD2_RING_BASE),  D_BDW_PLUS);
-	MMIO_GM(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
-	MMIO_D(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK, NULL, ring_mode_mmio_write);
-	MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK,
-			NULL, NULL);
-	MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK,
-			NULL, NULL);
+	MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE),  D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
+	MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		ring_mode_mmio_write);
+	MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
 			ring_timestamp_mmio_read, NULL);
 
-	MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS);
+	MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
 #define RING_REG(base) (base + 0xd0)
 	MMIO_RING_F(RING_REG, 4, F_RO, 0,
@@ -2428,13 +2446,16 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x234)
-	MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL);
-	MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, ~0LL, D_BDW_PLUS, NULL, NULL);
+	MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS,
+		NULL, NULL);
+	MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0,
+		~0LL, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x244)
-	MMIO_RING_D(RING_REG, D_BDW_PLUS);
-	MMIO_D(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
+	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+		NULL, NULL);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x370)
@@ -2468,8 +2489,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
-	MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
-	MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL);
+	MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
+	MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
 
 	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
@@ -2490,15 +2511,17 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
 
 	MMIO_D(0xfdc, D_BDW_PLUS);
-	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS);
-	MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS);
+	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(0xb1f0, D_BDW);
-	MMIO_D(0xb1c0, D_BDW);
+	MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0xb100, D_BDW);
-	MMIO_D(0xb10c, D_BDW);
+	MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0xb110, D_BDW);
 
 	MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
@@ -2508,10 +2531,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x44484, D_BDW_PLUS);
 	MMIO_D(0x4448c, D_BDW_PLUS);
 
-	MMIO_D(0x83a4, D_BDW);
+	MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
 
-	MMIO_D(0x8430, D_BDW);
+	MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_D(0x110000, D_BDW_PLUS);
 
@@ -2523,9 +2546,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(0x2248, D_BDW);
+	MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
 
 	return 0;
 }
@@ -2707,15 +2730,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(0xd08, D_SKL);
 	MMIO_D(0x20e0, D_SKL);
-	MMIO_D(0x20ec, D_SKL);
+	MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* TRTT */
-	MMIO_D(0x4de0, D_SKL);
-	MMIO_D(0x4de4, D_SKL);
-	MMIO_D(0x4de8, D_SKL);
-	MMIO_D(0x4dec, D_SKL);
-	MMIO_D(0x4df0, D_SKL);
-	MMIO_DH(0x4df4, D_SKL, NULL, gen9_trtte_write);
+	MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write);
 	MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write);
 
 	MMIO_D(0x45008, D_SKL);
@@ -2739,7 +2762,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x65f08, D_SKL);
 	MMIO_D(0x320f0, D_SKL);
 
-	MMIO_D(_REG_VCS2_EXCC, D_SKL);
+	MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0x70034, D_SKL);
 	MMIO_D(0x71034, D_SKL);
 	MMIO_D(0x72034, D_SKL);
@@ -2752,7 +2775,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
 
 	MMIO_D(0x44500, D_SKL);
-	MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS);
+	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9112caafdf01439a6e43f4d8c09ceed7dc613d4a Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 28 Feb 2017 15:40:10 +0800
Subject: drm/i915/gvt: add more registers into handlers list

those registers are render registers with F_CMD_ACCESS flag set

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 37956eee342c..ef17c38e00c4 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2332,6 +2332,17 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
 	MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL);
+	MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
@@ -2550,6 +2561,15 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
@@ -2776,6 +2796,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(0x44500, D_SKL);
 	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 1f58af304cce9e4a25b62b3619e69c586203c8ca Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 28 Feb 2017 15:41:03 +0800
Subject: drm/i915/gvt: fix an error for one register

register 0x20e0 should be mode register

v2: rebased to latest code base

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index ef17c38e00c4..548aedfbd402 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2749,7 +2749,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
 
 	MMIO_D(0xd08, D_SKL);
-	MMIO_D(0x20e0, D_SKL);
+	MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* TRTT */
-- 
cgit v1.2.3


From 858b0f571d30916bd69c45922045f24f26d6bfc9 Mon Sep 17 00:00:00 2001
From: Bing Niu <bing.niu@intel.com>
Date: Tue, 28 Feb 2017 11:39:48 -0500
Subject: drm/i915/gvt: set SFUSE_STRAP properly for vitual monitor detection

update the correct virtual montior connection status to vreg

v2: address yulei's comment on commit message

Signed-off-by: Bing Niu <bing.niu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 43e02e038375..5419ae6ec633 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -176,14 +176,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 		vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
 				SDE_PORTE_HOTPLUG_SPT);
 
-	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B))
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+	}
 
-	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C))
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+	}
 
-	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D))
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
+	}
 
 	if (IS_SKYLAKE(dev_priv) &&
 			intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
@@ -196,6 +202,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 				GEN8_PORT_DP_A_HOTPLUG;
 		else
 			vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
+
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
 	}
 }
 
-- 
cgit v1.2.3


From d1a9ccc4b1374a5a7762031fd8e4e398c68549e6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 28 Feb 2017 11:02:48 +0000
Subject: scsi: qedi: fix missing return error code check on call to
 qedi_setup_int

The call to qedi_setup_int is not updating the return code rc yet rc is
being checked for an error. Fix this by assigning rc to the return code
from the call to qedi_setup_int.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 5eda21d903e9..8e3d92807cb8 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 	 */
 	qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
 
-	qedi_setup_int(qedi);
+	rc = qedi_setup_int(qedi);
 	if (rc)
 		goto stop_iscsi_func;
 
-- 
cgit v1.2.3


From 6f8830f5bbab16e54f261de187f3df4644a5b977 Mon Sep 17 00:00:00 2001
From: Chris Leech <cleech@redhat.com>
Date: Mon, 27 Feb 2017 16:58:36 -0800
Subject: scsi: libiscsi: add lock around task lists to fix list corruption
 regression

There's a rather long standing regression from the commit "libiscsi:
Reduce locking contention in fast path"

Depending on iSCSI target behavior, it's possible to hit the case in
iscsi_complete_task where the task is still on a pending list
(!list_empty(&task->running)).  When that happens the task is removed
from the list while holding the session back_lock, but other task list
modification occur under the frwd_lock.  That leads to linked list
corruption and eventually a panicked system.

Rather than back out the session lock split entirely, in order to try
and keep some of the performance gains this patch adds another lock to
maintain the task lists integrity.

Major enterprise supported kernels have been backing out the lock split
for while now, thanks to the efforts at IBM where a lab setup has the
most reliable reproducer I've seen on this issue.  This patch has been
tested there successfully.

Signed-off-by: Chris Leech <cleech@redhat.com>
Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path")
Reported-by: Prashantha Subbarao <psubbara@us.ibm.com>
Reviewed-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # v3.15+
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++-
 include/scsi/libiscsi.h |  1 +
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 834d1212b6d5..3fca34a675af 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
 	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
 	task->state = state;
 
-	if (!list_empty(&task->running))
+	spin_lock_bh(&conn->taskqueuelock);
+	if (!list_empty(&task->running)) {
+		pr_debug_once("%s while task on list", __func__);
 		list_del_init(&task->running);
+	}
+	spin_unlock_bh(&conn->taskqueuelock);
 
 	if (conn->task == task)
 		conn->task = NULL;
@@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		if (session->tt->xmit_task(task))
 			goto free_task;
 	} else {
+		spin_lock_bh(&conn->taskqueuelock);
 		list_add_tail(&task->running, &conn->mgmtqueue);
+		spin_unlock_bh(&conn->taskqueuelock);
 		iscsi_conn_queue_work(conn);
 	}
 
@@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
 	 * this may be on the requeue list already if the xmit_task callout
 	 * is handling the r2ts while we are adding new ones
 	 */
+	spin_lock_bh(&conn->taskqueuelock);
 	if (list_empty(&task->running))
 		list_add_tail(&task->running, &conn->requeue);
+	spin_unlock_bh(&conn->taskqueuelock);
 	iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 	 * only have one nop-out as a ping from us and targets should not
 	 * overflow us with nop-ins
 	 */
+	spin_lock_bh(&conn->taskqueuelock);
 check_mgmt:
 	while (!list_empty(&conn->mgmtqueue)) {
 		conn->task = list_entry(conn->mgmtqueue.next,
 					 struct iscsi_task, running);
 		list_del_init(&conn->task->running);
+		spin_unlock_bh(&conn->taskqueuelock);
 		if (iscsi_prep_mgmt_task(conn, conn->task)) {
 			/* regular RX path uses back_lock */
 			spin_lock_bh(&conn->session->back_lock);
 			__iscsi_put_task(conn->task);
 			spin_unlock_bh(&conn->session->back_lock);
 			conn->task = NULL;
+			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto done;
+		spin_lock_bh(&conn->taskqueuelock);
 	}
 
 	/* process pending command queue */
@@ -1535,19 +1547,24 @@ check_mgmt:
 		conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
 					running);
 		list_del_init(&conn->task->running);
+		spin_unlock_bh(&conn->taskqueuelock);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
 			fail_scsi_task(conn->task, DID_IMM_RETRY);
+			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
 		if (rc) {
 			if (rc == -ENOMEM || rc == -EACCES) {
+				spin_lock_bh(&conn->taskqueuelock);
 				list_add_tail(&conn->task->running,
 					      &conn->cmdqueue);
 				conn->task = NULL;
+				spin_unlock_bh(&conn->taskqueuelock);
 				goto done;
 			} else
 				fail_scsi_task(conn->task, DID_ABORT);
+			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1558,6 +1575,7 @@ check_mgmt:
 		 * we need to check the mgmt queue for nops that need to
 		 * be sent to aviod starvation
 		 */
+		spin_lock_bh(&conn->taskqueuelock);
 		if (!list_empty(&conn->mgmtqueue))
 			goto check_mgmt;
 	}
@@ -1577,12 +1595,15 @@ check_mgmt:
 		conn->task = task;
 		list_del_init(&conn->task->running);
 		conn->task->state = ISCSI_TASK_RUNNING;
+		spin_unlock_bh(&conn->taskqueuelock);
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto done;
+		spin_lock_bh(&conn->taskqueuelock);
 		if (!list_empty(&conn->mgmtqueue))
 			goto check_mgmt;
 	}
+	spin_unlock_bh(&conn->taskqueuelock);
 	spin_unlock_bh(&conn->session->frwd_lock);
 	return -ENODATA;
 
@@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
 			goto prepd_reject;
 		}
 	} else {
+		spin_lock_bh(&conn->taskqueuelock);
 		list_add_tail(&task->running, &conn->cmdqueue);
+		spin_unlock_bh(&conn->taskqueuelock);
 		iscsi_conn_queue_work(conn);
 	}
 
@@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	INIT_LIST_HEAD(&conn->mgmtqueue);
 	INIT_LIST_HEAD(&conn->cmdqueue);
 	INIT_LIST_HEAD(&conn->requeue);
+	spin_lock_init(&conn->taskqueuelock);
 	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
 	/* allocate login_task used for the login/text sequences */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index b0e275de6dec..583875ea136a 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -196,6 +196,7 @@ struct iscsi_conn {
 	struct iscsi_task	*task;		/* xmit task in progress */
 
 	/* xmit */
+	spinlock_t		taskqueuelock;  /* protects the next three lists */
 	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
 	struct list_head	cmdqueue;	/* data-path cmd queue */
 	struct list_head	requeue;	/* tasks needing another run */
-- 
cgit v1.2.3


From b6b6fbc8310e1b12b05717da9df6953b138f812b Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Wed, 1 Mar 2017 14:34:52 +0800
Subject: drm/i915/gvt: use pfn_valid for better checking

Before get the page from pfn, use pfn_valid to check if pfn
is able to translate to page structure.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 182914c22ac5..241354890603 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -96,10 +96,10 @@ static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
 	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 	dma_addr_t daddr;
 
-	page = pfn_to_page(pfn);
-	if (is_error_page(page))
+	if (unlikely(!pfn_valid(pfn)))
 		return -EFAULT;
 
+	page = pfn_to_page(pfn);
 	daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
 			PCI_DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, daddr))
-- 
cgit v1.2.3


From 9dcfe2c75b51f454f39c2de4756e841228865b47 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Mar 2017 09:25:55 +0100
Subject: locking/refcounts: Change WARN() to WARN_ONCE()

Linus noticed that the new refcount.h APIs used WARN(), which would turn
into a dmesg DoS if it triggers frequently on some buggy driver.

So make sure we only warn once. These warnings are never supposed to happen,
so it's typically not a problem to lose subsequent warnings.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/CA+55aFzbYUTZ=oqZ2YgDjY0C2_n6ODhTfqj6V+m5xWmDxsuB0w@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/refcount.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/refcount.c b/lib/refcount.c
index 1d33366189d1..aa09ad3c30b0 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 		val = old;
 	}
 
-	WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
 	return true;
 }
@@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero);
 
 void refcount_add(unsigned int i, refcount_t *r)
 {
-	WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_add);
 
@@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r)
 		val = old;
 	}
 
-	WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
 	return true;
 }
@@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero);
  */
 void refcount_inc(refcount_t *r)
 {
-	WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_inc);
 
@@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 
 		new = val - i;
 		if (new > val) {
-			WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
 			return false;
 		}
 
@@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(refcount_dec_and_test);
 
 void refcount_dec(refcount_t *r)
 {
-	WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_dec);
 
@@ -204,7 +204,7 @@ bool refcount_dec_not_one(refcount_t *r)
 
 		new = val - 1;
 		if (new > val) {
-			WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
 			return true;
 		}
 
-- 
cgit v1.2.3


From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 1 Mar 2017 01:23:24 +0900
Subject: kprobes/x86: Fix kernel panic when certain exception-handling
 addresses are probed

Fix to the exception table entry check by using probed address
instead of the address of copied instruction.

This bug may cause unexpected kernel panic if user probe an address
where an exception can happen which should be fixup by __ex_table
(e.g. copy_from_user.)

Unless user puts a kprobe on such address, this doesn't
cause any problem.

This bug has been introduced years ago, by commit:

  464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently").

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently")
Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/common.h | 2 +-
 arch/x86/kernel/kprobes/core.c   | 6 +++---
 arch/x86/kernel/kprobes/opt.c    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index c6ee63f927ab..d688826e5736 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
 #endif
 
 /* Ensure if the instruction can be boostable */
-extern int can_boost(kprobe_opcode_t *instruction);
+extern int can_boost(kprobe_opcode_t *instruction, void *addr);
 /* Recover instruction if given address is probed */
 extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
 					 unsigned long addr);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 520b8dfe1640..88b3c942473d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes);
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes, void *addr)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
-	if (search_exception_tables((unsigned long)opcodes))
+	if (search_exception_tables((unsigned long)addr))
 		return 0;	/* Page fault may occur on this address. */
 
 retry:
@@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p)
 	 * __copy_instruction can modify the displacement of the instruction,
 	 * but it doesn't affect boostable check.
 	 */
-	if (can_boost(p->ainsn.insn))
+	if (can_boost(p->ainsn.insn, p->addr))
 		p->ainsn.boostable = 0;
 	else
 		p->ainsn.boostable = -1;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 3d1bee9d6a72..3e7c6e5a08ff 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
 
 	while (len < RELATIVEJUMP_SIZE) {
 		ret = __copy_instruction(dest + len, src + len);
-		if (!ret || !can_boost(dest + len))
+		if (!ret || !can_boost(dest + len, src + len))
 			return -EINVAL;
 		len += ret;
 	}
-- 
cgit v1.2.3


From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 27 Feb 2017 23:50:58 +0100
Subject: x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/

... since this is all x86-specific data and it makes sense to have it
under x86/ logically instead in the toplevel debugfs dir.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kdebugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index bdb83e431d89..38b64587b31b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void)
 	struct dentry *dbp, *version, *data;
 	int error = -ENOMEM;
 
-	dbp = debugfs_create_dir("boot_params", NULL);
+	dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
 	if (!dbp)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From e7c95effcd3a7a0c9535c809141ca499fede2c31 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 28 Feb 2017 07:05:59 +0100
Subject: s390/crypt: fix missing unlock in ctr_paes_crypt on error path

The ctr mode of protected key aes uses the ctrblk page if the
ctrblk_lock could be acquired. If the protected key has to be
reestablished and this operation fails the unlock for the
ctrblk_lock is missing. Add it.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/paes_s390.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index d69ea495c4d7..716b17238599 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			ret = blkcipher_walk_done(desc, walk, nbytes - n);
 		}
 		if (k < n) {
-			if (__ctr_paes_set_key(ctx) != 0)
+			if (__ctr_paes_set_key(ctx) != 0) {
+				if (locked)
+					spin_unlock(&ctrblk_lock);
 				return blkcipher_walk_done(desc, walk, -EIO);
+			}
 		}
 	}
 	if (locked)
-- 
cgit v1.2.3


From d9fcf2a1cbd1ff65d0109b1b400938808007fcd5 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 28 Feb 2017 07:42:01 +0100
Subject: s390: fix in-kernel program checks

A program check inside the kernel takes a slightly different path in
entry.S compare to a normal user fault. A recent change moved the store
of the breaking event address into the path taken for in-kernel program
checks as well, but %r14 has not been setup to point to the correct
location. A wild store is the consequence.

Move the store of the breaking event address to the code path for
user space faults.

Fixes: 34525e1f7e8d ("s390: store breaking event address only for program checks")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index dff2152350a7..6a7d737d514c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -490,7 +490,7 @@ ENTRY(pgm_check_handler)
 	jnz	.Lpgm_svcper		# -> single stepped svc
 1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	3f
+	j	4f
 2:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lgr	%r14,%r12
@@ -499,8 +499,8 @@ ENTRY(pgm_check_handler)
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
 	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r10,__THREAD_last_break(%r14)
+3:	stg	%r10,__THREAD_last_break(%r14)
+4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -509,14 +509,14 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	4f
+	jz	5f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4:	REENABLE_IRQS
+5:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
-- 
cgit v1.2.3


From e69ca822ce0ed3ba006ce384d7d205c81d92373f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:16:03 +0100
Subject: s390/cputime: remove last traces of cputime_t

The cputime_t type is a thing of the past, replace the last occurences
of the type in the s390 code with a simple u64.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 14 ++------------
 arch/s390/kernel/vtime.c        |  2 +-
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d1c407ddf703..e5672d6276a6 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,32 +8,22 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
-#include <asm/div64.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
-	return n / base;
-}
-
 /*
  * Convert cputime to microseconds and back.
  */
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
 {
-	return (__force unsigned long long) cputime >> 12;
+	return cputime >> 12;
 }
 
-
 u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 31bd96e81167..8f5f59a151b4 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime)
 }
 
 static void account_system_index_scaled(struct task_struct *p,
-					cputime_t cputime, cputime_t scaled,
+					u64 cputime, u64 scaled,
 					enum cpu_usage_stat index)
 {
 	p->stimescaled += cputime_to_nsecs(scaled);
-- 
cgit v1.2.3


From 3c915bdc1775acfa214195da1ffb39dabdd1a389 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:18:34 +0100
Subject: s390/cputime: reset all accounting fields on fork

copy_thread has to reset all cputime related field in the task struct,
not only user_timer and system_timer.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/process.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 54281660582c..249deafaa6ee 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -121,7 +121,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
 	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
 	/* Initialize per thread user and system timer values */
 	p->thread.user_timer = 0;
+	p->thread.guest_timer = 0;
 	p->thread.system_timer = 0;
+	p->thread.hardirq_timer = 0;
+	p->thread.softirq_timer = 0;
 
 	frame->sf.back_chain = 0;
 	/* new return point is ret_from_fork */
-- 
cgit v1.2.3


From e53051e757d6cd66741955b93581e54415e48a70 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:21:10 +0100
Subject: s390/cputime: provide archicture specific cputime_to_nsecs

The generic cputime_to_nsecs function first converts the cputime
to micro-seconds and then multiplies the result with 1000. This
looses some bits of accuracy, provide our own version of
cputime_to_nsecs that does not loose precision.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index e5672d6276a6..9072bf63a846 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,6 +8,7 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
+#include <asm/timex.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
@@ -17,13 +18,18 @@
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
 /*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
  */
 static inline u64 cputime_to_usecs(const u64 cputime)
 {
 	return cputime >> 12;
 }
 
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
+
 u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
-- 
cgit v1.2.3


From d03bd0454b101adb94d0b5a9cc11396182943cb4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:47:57 +0100
Subject: s390/timex: micro optimization for tod_to_ns

The conversion of a TOD value to nano-seconds currently uses a 32/32 bit
split with the calculation for "nsecs = (TOD * 125) >> 9". Using a
55/9 bit split saves an instruction.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/timex.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 354344dcc198..118535123f34 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void)
  *    ns = (todval * 125) >> 9;
  *
  * In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
  * we end up with
  *
- *    ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *    ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
  *
  */
 static inline unsigned long long tod_to_ns(unsigned long long todval)
 {
-	unsigned long long ns;
-
-	ns = ((todval >> 32) << 23) * 125;
-	ns += ((todval & 0xffffffff) * 125) >> 9;
-	return ns;
+	return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
 
 #endif
-- 
cgit v1.2.3


From b28ace12661fbcfd90959c1e84ff5a85113a82a1 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Thu, 23 Feb 2017 10:48:55 +0100
Subject: irqchip/crossbar: Fix incorrect type of local variables

The max and entry variables are unsigned according to the dt-bindings.
Fix following 3 sparse issues (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:222:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:222:52:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:245:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:245:56:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:263:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:263:56:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Cc: marc.zyngier@arm.com
Cc: jason@lakedaemon.net
Link: http://lkml.kernel.org/r/20170223094855.6546-1-fdemathieu@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-crossbar.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 1eef56a89b1f..05bbf171df37 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 
 static int __init crossbar_of_init(struct device_node *node)
 {
-	int i, size, max = 0, reserved = 0, entry;
+	int i, size, reserved = 0;
+	u32 max = 0, entry;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 
-- 
cgit v1.2.3


From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Tue, 28 Feb 2017 13:50:52 +0800
Subject: x86/apic: Fix a warning message in logical CPU IDs allocation

The current warning message in allocate_logical_cpuid() is somewhat confusing:

  Only 1 processors supported.Processor 2/0x2 and the rest are ignored.

As it might imply that there's only one CPU in the system - while what we ran
into here is a kernel limitation.

Fix the warning message to clarify all that:

  APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored.

( Also update the error return from -1 to -EINVAL, which is the more
  canonical return value. )

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: nicstange@gmail.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4261b3282ad9..11088b86e5c7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid)
 
 	/* Allocate a new cpuid. */
 	if (nr_logical_cpuids >= nr_cpu_ids) {
-		WARN_ONCE(1, "Only %d processors supported."
+		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
 			     "Processor %d/0x%x and the rest are ignored.\n",
-			     nr_cpu_ids - 1, nr_logical_cpuids, apicid);
-		return -1;
+			     nr_cpu_ids, nr_logical_cpuids, apicid);
+		return -EINVAL;
 	}
 
 	cpuid_to_apicid[nr_logical_cpuids] = apicid;
-- 
cgit v1.2.3


From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Thu, 23 Feb 2017 17:16:41 +0800
Subject: x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR()

The following commit:

  2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled")

... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this
check is also duplicated in try_to_enable_IR() - and it will never succeed in
calling irq_remapping_enable().

Remove the whole irq_remapping_enable() complication: if the IO-APIC is
disabled we cannot enable IRQ remapping.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: nicstange@gmail.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11088b86e5c7..aee7deddabd0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { }
 static inline void __x2apic_enable(void) { }
 #endif /* !CONFIG_X86_X2APIC */
 
-static int __init try_to_enable_IR(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (!x2apic_enabled() && skip_ioapic_setup) {
-		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
-		return -1;
-	}
-#endif
-	return irq_remapping_enable();
-}
-
 void __init enable_IR_x2apic(void)
 {
 	unsigned long flags;
 	int ret, ir_stat;
 
-	if (skip_ioapic_setup)
+	if (skip_ioapic_setup) {
+		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
 		return;
+	}
 
 	ir_stat = irq_remapping_prepare();
 	if (ir_stat < 0 && !x2apic_supported())
@@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void)
 
 	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
-		ir_stat = try_to_enable_IR();
+		ir_stat = irq_remapping_enable();
 	/* ir_stat contains the remap mode or an error code */
 	try_to_enable_x2apic(ir_stat);
 
-- 
cgit v1.2.3


From 2a4d0c627f5374f365a873dea4e10ae0bb437680 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dsafonov@virtuozzo.com>
Date: Mon, 13 Feb 2017 13:13:36 +0300
Subject: x86/selftests: Add clobbers for int80 on x86_64

Kernel erases R8..R11 registers prior returning to userspace
from int80:

  https://lkml.org/lkml/2009/10/1/164

GCC can reuse these registers and doesn't expect them to change
during syscall invocation. I met this kind of bug in CRIU once
GCC 6.1 and CLANG stored local variables in those registers
and the kernel zerofied them during syscall:

  https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa2

By that reason I suggest to add those registers to clobbers
in selftests.  Also, as noted by Andy - removed unneeded clobber
for flags in INT $0x80 inline asm.

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: 0x7f454c46@gmail.com
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/fsgsbase.c            |  2 +-
 tools/testing/selftests/x86/ldt_gdt.c             | 16 +++++++++++-----
 tools/testing/selftests/x86/ptrace_syscall.c      |  3 ++-
 tools/testing/selftests/x86/single_step_syscall.c |  5 ++++-
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5b2b4b3c634c..b4967d875236 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@ void do_unexpected_base(void)
 		long ret;
 		asm volatile ("int $0x80"
 			      : "=a" (ret) : "a" (243), "b" (low_desc)
-			      : "flags");
+			      : "r8", "r9", "r10", "r11");
 		memcpy(&desc, low_desc, sizeof(desc));
 		munmap(low_desc, sizeof(desc));
 
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 4af47079cf04..f6121612e769 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
 #define AR_DB			(1 << 22)
 #define AR_G			(1 << 23)
 
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
 static int nerrs;
 
 /* Points to an array of 1024 ints, each holding its own index. */
@@ -588,7 +594,7 @@ static int invoke_set_thread_area(void)
 	asm volatile ("int $0x80"
 		      : "=a" (ret), "+m" (low_user_desc) :
 			"a" (243), "b" (low_user_desc)
-		      : "flags");
+		      : INT80_CLOBBERS);
 	return ret;
 }
 
@@ -657,7 +663,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -688,7 +694,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -721,7 +727,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -774,7 +780,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index b037ce9cf116..eaea92439708 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args)
 	asm volatile ("int $0x80"
 		      : "+a" (args->nr),
 			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
-			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+			: : "r8", "r9", "r10", "r11");
 	args->arg5 = bp;
 #else
 	sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50c26358e8b7..a48da95c18fd 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps;
 #ifdef __x86_64__
 # define REG_IP REG_RIP
 # define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
 #else
 # define REG_IP REG_EIP
 # define WIDTH "l"
+# define INT80_CLOBBERS
 #endif
 
 static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@ int main()
 
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
-	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+			: INT80_CLOBBERS);
 	check_result();
 
 	/*
-- 
cgit v1.2.3


From 6b0b7551428e4caae1e2c023a529465a9a9ae2d4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 16 Feb 2017 17:00:50 +1100
Subject: perf/core: Rename CONFIG_[UK]PROBE_EVENT to CONFIG_[UK]PROBE_EVENTS

We have uses of CONFIG_UPROBE_EVENT and CONFIG_KPROBE_EVENT as
well as CONFIG_UPROBE_EVENTS and CONFIG_KPROBE_EVENTS.

Consistently use the plurals.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: davem@davemloft.net
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20170216060050.20866-1-anton@ozlabs.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/trace/kprobetrace.txt         |  2 +-
 Documentation/trace/uprobetracer.txt        |  2 +-
 arch/powerpc/configs/85xx/kmp204x_defconfig |  2 +-
 arch/s390/configs/default_defconfig         |  2 +-
 arch/s390/configs/gcov_defconfig            |  2 +-
 arch/s390/configs/performance_defconfig     |  2 +-
 arch/s390/defconfig                         |  2 +-
 kernel/trace/Kconfig                        |  6 +++---
 kernel/trace/Makefile                       |  4 ++--
 kernel/trace/trace.c                        | 10 +++++-----
 kernel/trace/trace_probe.h                  |  4 ++--
 11 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index e4991fb1eedc..41ef9d8efe95 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes
 functions). Unlike the Tracepoint based event, this can be added and removed
 dynamically, on the fly.
 
-To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
 
 Similar to the events tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index fa7b680ee8a0..bf526a7c5559 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -7,7 +7,7 @@
 Overview
 --------
 Uprobe based trace events are similar to kprobe based trace events.
-To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
 
 Similar to the kprobe-event tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
index aaaaa609cd24..34a4da23f000 100644
--- a/arch/powerpc/configs/85xx/kmp204x_defconfig
+++ b/arch/powerpc/configs/85xx/kmp204x_defconfig
@@ -210,7 +210,7 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
 CONFIG_RCU_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_CRYPTO_NULL=y
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD4=y
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 143b1e00b818..4b176fe83da4 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index f05d2d6e1087..0de46cc397f6 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 2358bf33c5ef..e167557b434c 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 68bfd09f1b02..97189dbaf34b 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5038005eb5d..d4a06e714645 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
-config KPROBE_EVENT
+config KPROBE_EVENTS
 	depends on KPROBES
 	depends on HAVE_REGS_AND_STACK_ACCESS_API
 	bool "Enable kprobes-based dynamic events"
@@ -447,7 +447,7 @@ config KPROBE_EVENT
 	  This option is also required by perf-probe subcommand of perf tools.
 	  If you want to use perf tools, this option is strongly recommended.
 
-config UPROBE_EVENT
+config UPROBE_EVENTS
 	bool "Enable uprobes-based dynamic events"
 	depends on ARCH_SUPPORTS_UPROBES
 	depends on MMU
@@ -466,7 +466,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
 	depends on BPF_SYSCALL
-	depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
+	depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
 	bool
 	default y
 	help
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e57980845549..90f2701d92a7 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
 obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
-obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 endif
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
-obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 707445ceb7ef..f35109514a01 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4341,22 +4341,22 @@ static const char readme_msg[] =
 	"\t\t\t  traces\n"
 #endif
 #endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
 	"\t  accepts: event-definitions (one definition per line)\n"
 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
 	"\t           -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"\t    place: <path>:<offset>\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 0c0ae54d44c6..903273c93e61 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),			\
 #define FETCH_TYPE_STRING	0
 #define FETCH_TYPE_STRSIZE	1
 
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
 void free_symbol_cache(struct symbol_cache *sc);
@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
 {
 	return NULL;
 }
-#endif /* CONFIG_KPROBE_EVENT */
+#endif /* CONFIG_KPROBE_EVENTS */
 
 struct probe_arg {
 	struct fetch_param	fetch;
-- 
cgit v1.2.3


From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001
From: Andrew Banman <abanman@hpe.com>
Date: Fri, 17 Feb 2017 11:07:49 -0600
Subject: x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack
 register

Writing to the software acknowledge clear register when there are no
pending messages causes a HUB error to assert. The original intent of this
write was to clear the pending bits before start of operation, but this is
an incorrect method and has been determined to be unnecessary.

Signed-off-by: Andrew Banman <abanman@hpe.com>
Acked-by: Mike Travis <mike.travis@hpe.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: rja@hpe.com
Cc: sivanich@hpe.com
Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/tlb_uv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 766d4d3529a1..f25982cdff90 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode)
 
 	ops.write_payload_first(pnode, first);
 	ops.write_payload_last(pnode, last);
-	ops.write_g_sw_ack(pnode, 0xffffUL);
 
 	/* in effect, all msg_type's are set to MSG_NOOP */
 	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
-- 
cgit v1.2.3


From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Sun, 19 Feb 2017 19:32:48 +0100
Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk

Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot.

This adds the appropriate quirk, thus fixing the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e244c19a2451..01c9cda3e1b7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TA */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */
-- 
cgit v1.2.3


From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:17:17 +0100
Subject: x86/hyperv: Hide unused label

This new 32-bit warning just showed up:

arch/x86/hyperv/hv_init.c: In function 'hyperv_init':
arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label]

The easiest solution is to move the label up into the existing #ifdef that
has the goto.

Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: devel@linuxdriverproject.org
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/hyperv/hv_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index db64baf0e500..8bef70e7f3cc 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -158,13 +158,13 @@ void hyperv_init(void)
 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 		return;
 	}
+register_msr_cs:
 #endif
 	/*
 	 * For 32 bit guests just use the MSR based mechanism for reading
 	 * the partition counter.
 	 */
 
-register_msr_cs:
 	hyperv_cs = &hyperv_cs_msr;
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
 		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
-- 
cgit v1.2.3


From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 27 Feb 2017 21:29:22 +0900
Subject: x86/vmware: Remove duplicate inclusion of asm/timer.h

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Cc: akataria@vmware.com
Cc: virtualization@lists.linux-foundation.org
Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/vmware.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 891f4dad7b2c..22403a28caf5 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,7 +30,6 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <asm/apic.h>
-#include <asm/timer.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"vmware: " fmt
-- 
cgit v1.2.3


From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 27 Feb 2017 22:07:03 +0900
Subject: x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Cc: fenghua.yu@intel.com
Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb9..759577d9d166 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -27,7 +27,6 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/task_work.h>
 
 #include <uapi/linux/magic.h>
-- 
cgit v1.2.3


From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Tue, 28 Feb 2017 21:34:28 +0800
Subject: x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC

The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq()
and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The
problem is that IRQs are allocated and freed in pci_enable_device()
and pci_disable_device(). But there are some drivers which don't call
pci_disable_device(), and they have good reasons not calling it, so
if they're using IOAPIC their IRQs won't have a chance to be released
from the IOAPIC. When this happens IOAPIC hot-removal fails with a
kernel stack dump and an error message like this:

  [149335.697989] pin16 on IOAPIC2 is still in use.

It turns out that we can fix it in a different way without moving IRQ
allocation into pcibios_alloc_irq(), thus avoiding the regression of
991de2e59090. We can keep the allocation and freeing of IRQs as is
within pci_enable_device()/pci_disable_device(), without breaking any
previous assumption of the rest of the system, keeping compatibility
with both the legacy and the modern drivers. We can accomplish this by
implementing the existing __weak hook of pcibios_release_device() thus
when a pci device is about to be deleted we get notified in the hook
and take the chance to release its IRQ, if any, from the IOAPIC.

Implement pcibios_release_device() for x86 to release any IRQ not released
by the driver.

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: tony.luck@intel.com
Cc: linux-pci@vger.kernel.org
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: fengguang.wu@intel.com
Cc: helgaas@kernel.org
Cc: kbuild-all@01.org
Cc: bhelgaas@google.com
Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/common.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 0cb52ae0a8f0..190e718694b1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pcibios_release_device(struct pci_dev *dev)
+{
+	if (atomic_dec_return(&dev->enable_cnt) >= 0)
+		pcibios_disable_device(dev);
+
+}
+#endif
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
-- 
cgit v1.2.3


From f2ae5da726172fcf82f7be801489dd585f6a38eb Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Tue, 28 Feb 2017 21:34:29 +0800
Subject: x86/ioapic: Split IOAPIC hot-removal into two steps

The hot removal of IOAPIC is handling PCI and ACPI removal in one go. That
only works when the PCI drivers released the interrupt resources, but
breaks when a IOAPIC interrupt is still associated to a PCI device.

The new pcibios_release_device() callback allows to solve that problem by
splitting the removal into two steps:

1) PCI removal:

   Release all PCI resources including eventually not yet released IOAPIC
   interrupts via the new pcibios_release_device() callback.

2) ACPI removal:

   After release of all PCI resources the ACPI resources can be released
   without issue.

[ tglx: Rewrote changelog ]

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: tony.luck@intel.com
Cc: linux-pci@vger.kernel.org
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: fengguang.wu@intel.com
Cc: helgaas@kernel.org
Cc: kbuild-all@01.org
Cc: bhelgaas@google.com
Link: http://lkml.kernel.org/r/1488288869-31290-3-git-send-email-rui.y.wang@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/internal.h |  2 ++
 drivers/acpi/ioapic.c   | 22 ++++++++++++++++------
 drivers/acpi/pci_root.c |  4 ++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 219b90bc0922..f15900132912 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void);
 void acpi_container_init(void);
 void acpi_memory_hotplug_init(void);
 #ifdef	CONFIG_ACPI_HOTPLUG_IOAPIC
+void pci_ioapic_remove(struct acpi_pci_root *root);
 int acpi_ioapic_remove(struct acpi_pci_root *root);
 #else
+static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; }
 static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
 #endif
 #ifdef CONFIG_ACPI_DOCK
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index 6d7ce6e12aaa..1120dfd625b8 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -206,24 +206,34 @@ int acpi_ioapic_add(acpi_handle root_handle)
 	return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
 }
 
-int acpi_ioapic_remove(struct acpi_pci_root *root)
+void pci_ioapic_remove(struct acpi_pci_root *root)
 {
-	int retval = 0;
 	struct acpi_pci_ioapic *ioapic, *tmp;
 
 	mutex_lock(&ioapic_list_lock);
 	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
 		if (root->device->handle != ioapic->root_handle)
 			continue;
-
-		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
-			retval = -EBUSY;
-
 		if (ioapic->pdev) {
 			pci_release_region(ioapic->pdev, 0);
 			pci_disable_device(ioapic->pdev);
 			pci_dev_put(ioapic->pdev);
 		}
+	}
+	mutex_unlock(&ioapic_list_lock);
+}
+
+int acpi_ioapic_remove(struct acpi_pci_root *root)
+{
+	int retval = 0;
+	struct acpi_pci_ioapic *ioapic, *tmp;
+
+	mutex_lock(&ioapic_list_lock);
+	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
+		if (root->device->handle != ioapic->root_handle)
+			continue;
+		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
+			retval = -EBUSY;
 		if (ioapic->res.flags && ioapic->res.parent)
 			release_resource(&ioapic->res);
 		list_del(&ioapic->list);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index bf601d4df8cf..919be0aa2578 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device)
 
 	pci_stop_root_bus(root->bus);
 
-	WARN_ON(acpi_ioapic_remove(root));
-
+	pci_ioapic_remove(root);
 	device_set_run_wake(root->bus->bridge, false);
 	pci_acpi_remove_bus_pm_notifier(device);
 
 	pci_remove_root_bus(root->bus);
+	WARN_ON(acpi_ioapic_remove(root));
 
 	dmar_device_remove(device->handle);
 
-- 
cgit v1.2.3


From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 23 Feb 2017 14:26:03 -0800
Subject: x86/pkeys: Check against max pkey to avoid overflows

Kirill reported a warning from UBSAN about undefined behavior when using
protection keys.  He is running on hardware that actually has support for
it, which is not widely available.

The warning triggers because of very large shifts of integers when doing a
pkey_free() of a large, invalid value. This happens because we never check
that the pkey "fits" into the mm_pkey_allocation_map().

I do not believe there is any danger here of anything bad happening
other than some aliasing issues where somebody could do:

	pkey_free(35);

and the kernel would effectively execute:

	pkey_free(8);

While this might be confusing to an app that was doing something stupid, it
has to do something stupid and the effects are limited to the app shooting
itself in the foot.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Cc: kirill.shutemov@linux.intel.com
Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pkeys.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 34684adb6899..b3b09b98896d 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline
 bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
+	/*
+	 * "Allocated" pkeys are those that have been returned
+	 * from pkey_alloc().  pkey 0 is special, and never
+	 * returned from pkey_alloc().
+	 */
+	if (pkey <= 0)
+		return false;
+	if (pkey >= arch_max_pkey())
+		return false;
 	return mm_pkey_allocation_map(mm) & (1U << pkey);
 }
 
@@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm)
 static inline
 int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-	/*
-	 * pkey 0 is special, always allocated and can never
-	 * be freed.
-	 */
-	if (!pkey)
-		return -EINVAL;
 	if (!mm_pkey_is_allocated(mm, pkey))
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 18 Feb 2017 12:31:40 +0100
Subject: x86/events: Remove last remnants of old filenames

Update to the new file paths, remove them from introductory comments.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/amd/core.c                               | 2 +-
 arch/x86/events/intel/cstate.c                           | 2 +-
 arch/x86/events/intel/rapl.c                             | 2 +-
 arch/x86/events/intel/uncore.h                           | 6 +++---
 tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index afb222b63cae..c84584bb9402 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 			return &amd_f15_PMC20;
 		}
 	case AMD_EVENT_NB:
-		/* moved to perf_event_amd_uncore.c */
+		/* moved to uncore.c */
 		return &emptyconstraint;
 	default:
 		return &emptyconstraint;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index aff4b5b69d40..238ae3248ba5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_cstate.c: support cstate residency counters
+ * Support cstate residency counters
  *
  * Copyright (C) 2015, Intel Corp.
  * Author: Kan Liang (kan.liang@intel.com)
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 22054ca49026..9d05c7e67f60 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Support Intel RAPL energy consumption counters
  * Copyright (C) 2013 Google, Inc., Stephane Eranian
  *
  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index ad986c1e29bc..df5989f27b1b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head;
 extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
-/* perf_event_intel_uncore_snb.c */
+/* uncore_snb.c */
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
@@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void);
 void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
-/* perf_event_intel_uncore_snbep.c */
+/* uncore_snbep.c */
 int snbep_uncore_pci_init(void);
 void snbep_uncore_cpu_init(void);
 int ivbep_uncore_pci_init(void);
@@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void);
 int skx_uncore_pci_init(void);
 void skx_uncore_cpu_init(void);
 
-/* perf_event_intel_uncore_nhmex.c */
+/* uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 7913363bde5c..4f3c758d875d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -31,7 +31,7 @@
 #error Instruction buffer size too small
 #endif
 
-/* Based on branch_type() from perf_event_intel_lbr.c */
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
 static void intel_pt_insn_decoder(struct insn *insn,
 				  struct intel_pt_insn *intel_pt_insn)
 {
-- 
cgit v1.2.3


From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 20 Feb 2017 10:12:35 +1100
Subject: x86/purgatory: Make functions and variables static

Sparse emits several 'symbol not declared' warnings for various
functions and variables.

Add static keyword to functions and variables which have file scope
only.

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/purgatory/purgatory.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 25e068ba3382..2a5f4373c797 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -18,11 +18,11 @@ struct sha_region {
 	unsigned long len;
 };
 
-unsigned long backup_dest = 0;
-unsigned long backup_src = 0;
-unsigned long backup_sz = 0;
+static unsigned long backup_dest;
+static unsigned long backup_src;
+static unsigned long backup_sz;
 
-u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
 
 struct sha_region sha_regions[16] = {};
 
@@ -39,7 +39,7 @@ static int copy_backup_region(void)
 	return 0;
 }
 
-int verify_sha256_digest(void)
+static int verify_sha256_digest(void)
 {
 	struct sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
-- 
cgit v1.2.3


From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 20 Feb 2017 10:12:36 +1100
Subject: x86/purgatory: Fix sparse warning, symbol not declared

Sparse emits warning, 'symbol not declared' for a function that has
neither file scope nor a forward declaration. The functions only call
site is an ASM file.

Add a header file with the function declaration. Include the header file in
the C source file defining the function in order to fix the sparse
warning. Include the header file in ASM file containing the call site to
document the usage.

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/purgatory/purgatory.c    | 1 +
 arch/x86/purgatory/purgatory.h    | 8 ++++++++
 arch/x86/purgatory/setup-x86_64.S | 1 +
 3 files changed, 10 insertions(+)
 create mode 100644 arch/x86/purgatory/purgatory.h

diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 2a5f4373c797..b6d5c8946e66 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,6 +11,7 @@
  */
 
 #include "sha256.h"
+#include "purgatory.h"
 #include "../boot/string.h"
 
 struct sha_region {
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
new file mode 100644
index 000000000000..e2e365a6c192
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.h
@@ -0,0 +1,8 @@
+#ifndef PURGATORY_H
+#define PURGATORY_H
+
+#ifndef __ASSEMBLY__
+extern void purgatory(void);
+#endif	/* __ASSEMBLY__ */
+
+#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index fe3c91ba1bd0..f90e9dfa90bb 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,6 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
+#include "purgatory.h"
 
 	.text
 	.globl purgatory_start
-- 
cgit v1.2.3


From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 21 Feb 2017 19:36:39 +0100
Subject: x86/boot: Correct setup_header.start_sys name

It is called start_sys_seg elsewhere so rename it to that. It is an
obsolete field so we could just as well directly call it __u16 __pad...

No functional change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/uapi/asm/bootparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 5138dacf8bb8..07244ea16765 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -58,7 +58,7 @@ struct setup_header {
 	__u32	header;
 	__u16	version;
 	__u32	realmode_swtch;
-	__u16	start_sys;
+	__u16	start_sys_seg;
 	__u16	kernel_version;
 	__u8	type_of_loader;
 	__u8	loadflags;
-- 
cgit v1.2.3


From 25b68a8f0ab13a98de02650208ec927796659898 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 17 Feb 2017 10:13:59 -0500
Subject: timerfd: Only check CAP_WAKE_ALARM when it is needed

timerfd_create() and do_timerfd_settime() evaluate capable(CAP_WAKE_ALARM)
unconditionally although CAP_WAKE_ALARM is only required for
CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM.

This can cause extraneous audit messages when using a LSM such as SELinux,
incorrectly causes PF_SUPERPRIV to be set even when no privilege was
exercised, and is inefficient.

Flip the order of the tests in both functions so that we only call
capable() if the capability is truly required for the operation.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-security-module@vger.kernel.org
Cc: selinux@tycho.nsa.gov
Link: http://lkml.kernel.org/r/1487344439-22293-1-git-send-email-sds@tycho.nsa.gov
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/timerfd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 384fa759a563..c543cdb5f8ed 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	     clockid != CLOCK_BOOTTIME_ALARM))
 		return -EINVAL;
 
-	if (!capable(CAP_WAKE_ALARM) &&
-	    (clockid == CLOCK_REALTIME_ALARM ||
-	     clockid == CLOCK_BOOTTIME_ALARM))
+	if ((clockid == CLOCK_REALTIME_ALARM ||
+	     clockid == CLOCK_BOOTTIME_ALARM) &&
+	    !capable(CAP_WAKE_ALARM))
 		return -EPERM;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags,
 		return ret;
 	ctx = f.file->private_data;
 
-	if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+	if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
 		fdput(f);
 		return -EPERM;
 	}
-- 
cgit v1.2.3


From a4b0e8a4e92b1baa860e744847fbdb84a50a5071 Mon Sep 17 00:00:00 2001
From: "Potomski, MichalX" <michalx.potomski@intel.com>
Date: Thu, 23 Feb 2017 09:05:30 +0000
Subject: scsi: ufs: Factor out ufshcd_read_desc_param

Since in UFS 2.1 specification some of the descriptor lengths differs
from 2.0 specification and some devices, which are reporting spec
version 2.0 have different descriptor lengths we can not rely on
hardcoded values taken from 2.0 specification. This patch introduces
reading these lengths per each device from descriptor headers at probe
time to ensure their correctness.

Signed-off-by: Michal' Potomski <michalx.potomski@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs.h    |  22 ++---
 drivers/scsi/ufs/ufshcd.c | 231 ++++++++++++++++++++++++++++++++++------------
 drivers/scsi/ufs/ufshcd.h |  15 +++
 3 files changed, 196 insertions(+), 72 deletions(-)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 318e4a1f76c9..54deeb754db5 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -146,7 +146,7 @@ enum attr_idn {
 /* Descriptor idn for Query requests */
 enum desc_idn {
 	QUERY_DESC_IDN_DEVICE		= 0x0,
-	QUERY_DESC_IDN_CONFIGURAION	= 0x1,
+	QUERY_DESC_IDN_CONFIGURATION	= 0x1,
 	QUERY_DESC_IDN_UNIT		= 0x2,
 	QUERY_DESC_IDN_RFU_0		= 0x3,
 	QUERY_DESC_IDN_INTERCONNECT	= 0x4,
@@ -162,19 +162,13 @@ enum desc_header_offset {
 	QUERY_DESC_DESC_TYPE_OFFSET	= 0x01,
 };
 
-enum ufs_desc_max_size {
-	QUERY_DESC_DEVICE_MAX_SIZE		= 0x40,
-	QUERY_DESC_CONFIGURAION_MAX_SIZE	= 0x90,
-	QUERY_DESC_UNIT_MAX_SIZE		= 0x23,
-	QUERY_DESC_INTERCONNECT_MAX_SIZE	= 0x06,
-	/*
-	 * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
-	 * of descriptor header.
-	 */
-	QUERY_DESC_STRING_MAX_SIZE		= 0xFE,
-	QUERY_DESC_GEOMETRY_MAX_SIZE		= 0x44,
-	QUERY_DESC_POWER_MAX_SIZE		= 0x62,
-	QUERY_DESC_RFU_MAX_SIZE			= 0x00,
+enum ufs_desc_def_size {
+	QUERY_DESC_DEVICE_DEF_SIZE		= 0x40,
+	QUERY_DESC_CONFIGURATION_DEF_SIZE	= 0x90,
+	QUERY_DESC_UNIT_DEF_SIZE		= 0x23,
+	QUERY_DESC_INTERCONNECT_DEF_SIZE	= 0x06,
+	QUERY_DESC_GEOMETRY_DEF_SIZE		= 0x44,
+	QUERY_DESC_POWER_DEF_SIZE		= 0x62,
 };
 
 /* Unit descriptor parameters offsets in bytes*/
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index dc6efbd1be8e..1359913bf840 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -100,19 +100,6 @@
 #define ufshcd_hex_dump(prefix_str, buf, len) \
 print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false)
 
-static u32 ufs_query_desc_max_size[] = {
-	QUERY_DESC_DEVICE_MAX_SIZE,
-	QUERY_DESC_CONFIGURAION_MAX_SIZE,
-	QUERY_DESC_UNIT_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-	QUERY_DESC_INTERCONNECT_MAX_SIZE,
-	QUERY_DESC_STRING_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-	QUERY_DESC_GEOMETRY_MAX_SIZE,
-	QUERY_DESC_POWER_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-};
-
 enum {
 	UFSHCD_MAX_CHANNEL	= 0,
 	UFSHCD_MAX_ID		= 1,
@@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 		goto out;
 	}
 
-	if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+	if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
 		dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
 				__func__, *buf_len);
 		err = -EINVAL;
@@ -2937,6 +2924,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
 	return err;
 }
 
+/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+	enum desc_idn desc_id,
+	int desc_index,
+	int *desc_length)
+{
+	int ret;
+	u8 header[QUERY_DESC_HDR_SIZE];
+	int header_len = QUERY_DESC_HDR_SIZE;
+
+	if (desc_id >= QUERY_DESC_IDN_MAX)
+		return -EINVAL;
+
+	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+					desc_id, desc_index, 0, header,
+					&header_len);
+
+	if (ret) {
+		dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+			__func__, desc_id);
+		return ret;
+	} else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+		dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+			__func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+			desc_id);
+		ret = -EINVAL;
+	}
+
+	*desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+	return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+	enum desc_idn desc_id, int *desc_len)
+{
+	switch (desc_id) {
+	case QUERY_DESC_IDN_DEVICE:
+		*desc_len = hba->desc_size.dev_desc;
+		break;
+	case QUERY_DESC_IDN_POWER:
+		*desc_len = hba->desc_size.pwr_desc;
+		break;
+	case QUERY_DESC_IDN_GEOMETRY:
+		*desc_len = hba->desc_size.geom_desc;
+		break;
+	case QUERY_DESC_IDN_CONFIGURATION:
+		*desc_len = hba->desc_size.conf_desc;
+		break;
+	case QUERY_DESC_IDN_UNIT:
+		*desc_len = hba->desc_size.unit_desc;
+		break;
+	case QUERY_DESC_IDN_INTERCONNECT:
+		*desc_len = hba->desc_size.interc_desc;
+		break;
+	case QUERY_DESC_IDN_STRING:
+		*desc_len = QUERY_DESC_MAX_SIZE;
+		break;
+	case QUERY_DESC_IDN_RFU_0:
+	case QUERY_DESC_IDN_RFU_1:
+		*desc_len = 0;
+		break;
+	default:
+		*desc_len = 0;
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
 /**
  * ufshcd_read_desc_param - read the specified descriptor parameter
  * @hba: Pointer to adapter instance
@@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
 static int ufshcd_read_desc_param(struct ufs_hba *hba,
 				  enum desc_idn desc_id,
 				  int desc_index,
-				  u32 param_offset,
+				  u8 param_offset,
 				  u8 *param_read_buf,
-				  u32 param_size)
+				  u8 param_size)
 {
 	int ret;
 	u8 *desc_buf;
-	u32 buff_len;
+	int buff_len;
 	bool is_kmalloc = true;
 
-	/* safety checks */
-	if (desc_id >= QUERY_DESC_IDN_MAX)
+	/* Safety check */
+	if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
 		return -EINVAL;
 
-	buff_len = ufs_query_desc_max_size[desc_id];
-	if ((param_offset + param_size) > buff_len)
-		return -EINVAL;
+	/* Get the max length of descriptor from structure filled up at probe
+	 * time.
+	 */
+	ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
 
-	if (!param_offset && (param_size == buff_len)) {
-		/* memory space already available to hold full descriptor */
-		desc_buf = param_read_buf;
-		is_kmalloc = false;
-	} else {
-		/* allocate memory to hold full descriptor */
+	/* Sanity checks */
+	if (ret || !buff_len) {
+		dev_err(hba->dev, "%s: Failed to get full descriptor length",
+			__func__);
+		return ret;
+	}
+
+	/* Check whether we need temp memory */
+	if (param_offset != 0 || param_size < buff_len) {
 		desc_buf = kmalloc(buff_len, GFP_KERNEL);
 		if (!desc_buf)
 			return -ENOMEM;
+	} else {
+		desc_buf = param_read_buf;
+		is_kmalloc = false;
 	}
 
+	/* Request for full descriptor */
 	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
-					desc_id, desc_index, 0, desc_buf,
-					&buff_len);
+					desc_id, desc_index, 0,
+					desc_buf, &buff_len);
 
 	if (ret) {
 		dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
 			__func__, desc_id, desc_index, param_offset, ret);
-
 		goto out;
 	}
 
@@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
 		goto out;
 	}
 
-	/*
-	 * While reading variable size descriptors (like string descriptor),
-	 * some UFS devices may report the "LENGTH" (field in "Transaction
-	 * Specific fields" of Query Response UPIU) same as what was requested
-	 * in Query Request UPIU instead of reporting the actual size of the
-	 * variable size descriptor.
-	 * Although it's safe to ignore the "LENGTH" field for variable size
-	 * descriptors as we can always derive the length of the descriptor from
-	 * the descriptor header fields. Hence this change impose the length
-	 * match check only for fixed size descriptors (for which we always
-	 * request the correct size as part of Query Request UPIU).
-	 */
-	if ((desc_id != QUERY_DESC_IDN_STRING) &&
-	    (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
-		dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
-			__func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
-		ret = -EINVAL;
-		goto out;
-	}
+	/* Check wherher we will not copy more data, than available */
+	if (is_kmalloc && param_size > buff_len)
+		param_size = buff_len;
 
 	if (is_kmalloc)
 		memcpy(param_read_buf, &desc_buf[param_offset], param_size);
@@ -5919,8 +5983,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level)
 static void ufshcd_init_icc_levels(struct ufs_hba *hba)
 {
 	int ret;
-	int buff_len = QUERY_DESC_POWER_MAX_SIZE;
-	u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+	int buff_len = hba->desc_size.pwr_desc;
+	u8 desc_buf[hba->desc_size.pwr_desc];
 
 	ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
 	if (ret) {
@@ -6017,11 +6081,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 {
 	int err;
 	u8 model_index;
-	u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
-	u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+	u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+	u8 desc_buf[hba->desc_size.dev_desc];
 
-	err = ufshcd_read_device_desc(hba, desc_buf,
-					QUERY_DESC_DEVICE_MAX_SIZE);
+	err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
 	if (err) {
 		dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
 			__func__, err);
@@ -6038,14 +6101,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
 
 	err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
-					QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+				QUERY_DESC_MAX_SIZE, ASCII_STD);
 	if (err) {
 		dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
 			__func__, err);
 		goto out;
 	}
 
-	str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+	str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
 	strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
 		min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
 		      MAX_MODEL_LEN));
@@ -6251,6 +6314,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
 	hba->req_abort_count = 0;
 }
 
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+	int err;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+		&hba->desc_size.dev_desc);
+	if (err)
+		hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+		&hba->desc_size.pwr_desc);
+	if (err)
+		hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+		&hba->desc_size.interc_desc);
+	if (err)
+		hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+		&hba->desc_size.conf_desc);
+	if (err)
+		hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+		&hba->desc_size.unit_desc);
+	if (err)
+		hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+		&hba->desc_size.geom_desc);
+	if (err)
+		hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+	hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+	hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+	hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+	hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+	hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+	hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
 /**
  * ufshcd_probe_hba - probe hba to detect device and initialize
  * @hba: per-adapter instance
@@ -6285,6 +6393,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 	if (ret)
 		goto out;
 
+	/* Init check for device descriptor sizes */
+	ufshcd_init_desc_sizes(hba);
+
 	ret = ufs_get_device_desc(hba, &card);
 	if (ret) {
 		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
@@ -6320,6 +6431,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 
 	/* set the state as operational after switching to desired gear */
 	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
 	/*
 	 * If we are in error handling context or in power management callbacks
 	 * context, no need to scan the host
@@ -7774,6 +7886,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	hba->mmio_base = mmio_base;
 	hba->irq = irq;
 
+	/* Set descriptor lengths to specification defaults */
+	ufshcd_def_desc_sizes(hba);
+
 	err = ufshcd_hba_init(hba);
 	if (err)
 		goto out_error;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 7630600217a2..cdc8bd05f7df 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -220,6 +220,15 @@ struct ufs_dev_cmd {
 	struct ufs_query query;
 };
 
+struct ufs_desc_size {
+	int dev_desc;
+	int pwr_desc;
+	int geom_desc;
+	int interc_desc;
+	int unit_desc;
+	int conf_desc;
+};
+
 /**
  * struct ufs_clk_info - UFS clock related info
  * @list: list headed by hba->clk_list_head
@@ -483,6 +492,7 @@ struct ufs_stats {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
@@ -666,6 +676,7 @@ struct ufs_hba {
 	bool is_urgent_bkops_lvl_checked;
 
 	struct rw_semaphore clk_scaling_lock;
+	struct ufs_desc_size desc_size;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
@@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, bool *flag_res);
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+	int *desc_length);
+
 u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */
-- 
cgit v1.2.3


From c46f09175dabd5dd6a1507f36250bfa734a0156e Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 1 Mar 2017 17:27:00 +0900
Subject: scsi: sd: Check for unaligned partial completion

Commit <f2e767bb5d6e> ("mpt3sas: Force request partial completion
alignment") was not considering the case of commands not operating on
logical block size units (e.g. REQ_OP_ZONE_REPORT and its 64B aligned
partial replies). In this case, forcing alignment of resid to the device
logical block size can break the command result, e.g. in the case of
REQ_OP_ZONE_REPORT, the exact number of zone reported by the device.

Move the partial completion alignement check of mpt3sas to a generic
implementation in sd_done(). The check is added within the default
section of the initial req_op() switch case so that the report and reset
zone commands are ignored. In addition, as sd_done() is not called for
passthrough requests, resid corrections are not done as intended by the
initial mpt3sas patch.

Fixes: f2e767bb5d6e ("mpt3sas: Force request partial completion alignment")
Cc: <stable@vger.kernel.org> # v4.10
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 ---------------
 drivers/scsi/sd.c                    | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 46e866c36c8a..69c29c560575 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	struct MPT3SAS_DEVICE *sas_device_priv_data;
 	u32 response_code = 0;
 	unsigned long flags;
-	unsigned int sector_sz;
 
 	mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
 
@@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	}
 
 	xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
-
-	/* In case of bogus fw or device, we could end up having
-	 * unaligned partial completion. We can force alignment here,
-	 * then scsi-ml does not need to handle this misbehavior.
-	 */
-	sector_sz = scmd->device->sector_size;
-	if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
-		     xfer_cnt % sector_sz)) {
-		sdev_printk(KERN_INFO, scmd->device,
-		    "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
-			    xfer_cnt, sector_sz);
-		xfer_cnt = round_down(xfer_cnt, sector_sz);
-	}
-
 	scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
 	if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
 		log_info =  le32_to_cpu(mpi_reply->IOCLogInfo);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c7839f6c35cc..fb9b4d29af0b 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1783,6 +1783,8 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 {
 	int result = SCpnt->result;
 	unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+	unsigned int sector_size = SCpnt->device->sector_size;
+	unsigned int resid;
 	struct scsi_sense_hdr sshdr;
 	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
 	struct request *req = SCpnt->request;
@@ -1813,6 +1815,21 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 			scsi_set_resid(SCpnt, blk_rq_bytes(req));
 		}
 		break;
+	default:
+		/*
+		 * In case of bogus fw or device, we could end up having
+		 * an unaligned partial completion. Check this here and force
+		 * alignment.
+		 */
+		resid = scsi_get_resid(SCpnt);
+		if (resid & (sector_size - 1)) {
+			sd_printk(KERN_INFO, sdkp,
+				"Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+				resid, sector_size);
+			resid = min(scsi_bufflen(SCpnt),
+				    round_up(resid, sector_size));
+			scsi_set_resid(SCpnt, resid);
+		}
 	}
 
 	if (result) {
-- 
cgit v1.2.3


From 8893cf6cb1cf56334c05120e23092dbfc9423ebb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 1 Mar 2017 09:00:36 -0800
Subject: scsi: mpt3sas: Avoid sleeping in interrupt context

Commit 669f044170d8 ("scsi: srp_transport: Move queuecommand() wait code
to SCSI core") can make scsi_internal_device_block() sleep.  However,
the mpt3sas driver can call this function from an interrupt
handler. Hence add a second argument to scsi_internal_device_block()
that restores the old behavior of this function for the mpt3sas handler.

The call chain that triggered an "IRQ handler enabled interrupts"
complaint is as follows:

_base_interrupt()
-> _base_async_event()
   -> mpt3sas_scsih_event_callback()
      -> _scsih_check_topo_delete_events()
         -> _scsih_block_io_to_children_attached_directly()
            -> _scsih_block_io_device()
               -> _scsih_internal_device_block()
                  -> scsi_internal_device_block()

Reported-by: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Omar Sandoval <osandov@osandov.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sathya Prakash <sathya.prakash@broadcom.com>
Cc: Chaitra P B <chaitra.basappa@broadcom.com>
Cc: Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
Cc: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
Cc: <stable@vger.kernel.org> # v4.10+
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.h  |  3 ---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c |  4 ++--
 drivers/scsi/scsi_lib.c              | 14 ++++++++++----
 drivers/scsi/scsi_priv.h             |  3 ---
 include/scsi/scsi_device.h           |  4 ++++
 5 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 7fe7e6ed595b..8981806fb13f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc,
 	u64 sas_address, u16 handle, u8 phy_number, u8 link_rate);
 extern struct sas_function_template mpt3sas_transport_functions;
 extern struct scsi_transport_template *mpt3sas_transport_template;
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-				enum scsi_device_state new_state);
 /* trigger data externs */
 void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc,
 	struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 69c29c560575..919ba2bb15f1 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev,
 	    sas_device_priv_data->sas_target->handle);
 	sas_device_priv_data->block = 1;
 
-	r = scsi_internal_device_block(sdev);
+	r = scsi_internal_device_block(sdev, false);
 	if (r == -EINVAL)
 		sdev_printk(KERN_WARNING, sdev,
 		    "device_block failed with return(%d) for handle(0x%04x)\n",
@@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
 		    "performing a block followed by an unblock\n",
 		    r, sas_device_priv_data->sas_target->handle);
 		sas_device_priv_data->block = 1;
-		r = scsi_internal_device_block(sdev);
+		r = scsi_internal_device_block(sdev, false);
 		if (r)
 			sdev_printk(KERN_WARNING, sdev, "retried device_block "
 			    "failed with return(%d) for handle(0x%04x)\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f5e45a252485..f41e6b84a1bd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume);
 /**
  * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
  * @sdev:	device to block
+ * @wait:	Whether or not to wait until ongoing .queuecommand() /
+ *		.queue_rq() calls have finished.
  *
  * Block request made by scsi lld's to temporarily stop all
  * scsi commands on the specified device. May sleep.
@@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume);
  * remove the rport mutex lock and unlock calls from srp_queuecommand().
  */
 int
-scsi_internal_device_block(struct scsi_device *sdev)
+scsi_internal_device_block(struct scsi_device *sdev, bool wait)
 {
 	struct request_queue *q = sdev->request_queue;
 	unsigned long flags;
@@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev)
 	 * request queue. 
 	 */
 	if (q->mq_ops) {
-		blk_mq_quiesce_queue(q);
+		if (wait)
+			blk_mq_quiesce_queue(q);
+		else
+			blk_mq_stop_hw_queues(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_stop_queue(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
-		scsi_wait_for_queuecommand(sdev);
+		if (wait)
+			scsi_wait_for_queuecommand(sdev);
 	}
 
 	return 0;
@@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
 static void
 device_block(struct scsi_device *sdev, void *data)
 {
-	scsi_internal_device_block(sdev);
+	scsi_internal_device_block(sdev, true);
 }
 
 static int
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 99bfc985e190..f11bd102d6d5 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { }
  */
 
 #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT	600	/* units in seconds */
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-					enum scsi_device_state new_state);
 
 #endif /* _SCSI_PRIV_H */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6f22b39f1b0c..080c7ce9bae8 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev)
 		sdev->sdev_state == SDEV_CREATED_BLOCK;
 }
 
+int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_unblock(struct scsi_device *sdev,
+				 enum scsi_device_state new_state);
+
 /* accessor functions for the SCSI parameters */
 static inline int scsi_device_sync(struct scsi_device *sdev)
 {
-- 
cgit v1.2.3


From 52e51f16407b7b34e26affb500a21e250d9fce0b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 1 Mar 2017 19:04:35 +0000
Subject: efi/libstub: Treat missing SecureBoot variable as Secure Boot
 disabled

The newly refactored code that infers the firmware's Secure Boot state
prints the following error when the EFI variable 'SecureBoot' does not
exist:

  EFI stub: ERROR: Could not determine UEFI Secure Boot status.

However, this variable is only guaranteed to be defined on a system that
is Secure Boot capable to begin with, and so it is not an error if it is
missing. So report Secure Boot as being disabled in this case, without
printing any error messages.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1488395076-29712-2-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/secureboot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 6def402bf569..5da36e56b36a 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -45,6 +45,8 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
 	size = sizeof(secboot);
 	status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid,
 			     NULL, &size, &secboot);
+	if (status == EFI_NOT_FOUND)
+		return efi_secureboot_mode_disabled;
 	if (status != EFI_SUCCESS)
 		goto out_efi_err;
 
@@ -78,7 +80,5 @@ secure_boot_enabled:
 
 out_efi_err:
 	pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
-	if (status == EFI_NOT_FOUND)
-		return efi_secureboot_mode_disabled;
 	return efi_secureboot_mode_unknown;
 }
-- 
cgit v1.2.3


From d1eb98143c56f24fef125f5bbed49ae0b52fb7d6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 1 Mar 2017 19:05:54 +0000
Subject: efi/arm: Fix boot crash with CONFIG_CPUMASK_OFFSTACK=y

On ARM and arm64, we use a dedicated mm_struct to map the UEFI
Runtime Services regions, which allows us to map those regions
on demand, and in a way that is guaranteed to be compatible
with incoming kernels across kexec.

As it turns out, we don't fully initialize the mm_struct in the
same way as process mm_structs are initialized on fork(), which
results in the following crash on ARM if CONFIG_CPUMASK_OFFSTACK=y
is enabled:

  ...
  EFI Variables Facility v0.08 2004-May-17
  Unable to handle kernel NULL pointer dereference at virtual address 00000000
  [...]
  Process swapper/0 (pid: 1)
  ...
  __memzero()
  check_and_switch_context()
  virt_efi_get_next_variable()
  efivar_init()
  efivars_sysfs_init()
  do_one_initcall()
  ...

This is due to a missing call to mm_init_cpumask(), so add it.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.5+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1488395154-29786-1-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/arm-runtime.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 349dc3e1e52e..974c5a31a005 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void)
 	bool systab_found;
 
 	efi_mm.pgd = pgd_alloc(&efi_mm);
+	mm_init_cpumask(&efi_mm);
 	init_new_context(NULL, &efi_mm);
 
 	systab_found = false;
-- 
cgit v1.2.3


From 4ec3dd89052a437304e1451733c989b8cec681af Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Thu, 2 Mar 2017 15:12:47 +0800
Subject: drm/i915/gvt: fix an error for F_RO flag

the ro_mask is not stored into each mmio entry

Fixes: 12d14cc43b34 ("drm/i915/gvt: Introduce a framework for tracking HW registers.")
Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 548aedfbd402..8e43395c748a 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -121,6 +121,7 @@ static int new_mmio_info(struct intel_gvt *gvt,
 		info->size = size;
 		info->length = (i + 4) < end ? 4 : (end - i);
 		info->addr_mask = addr_mask;
+		info->ro_mask = ro_mask;
 		info->device = device;
 		info->read = read ? read : intel_vgpu_default_mmio_read;
 		info->write = write ? write : intel_vgpu_default_mmio_write;
-- 
cgit v1.2.3


From 4c77b18cf8b7ab37c7d5737b4609010d2ceec5f0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 11:24:35 +0100
Subject: sched/fair: Make select_idle_cpu() more aggressive

Kitsunyan reported desktop latency issues on his Celeron 887 because
of commit:

  1b568f0aabf2 ("sched/core: Optimize SCHED_SMT")

... even though his CPU doesn't do SMT.

The effect of running the SMT code on a !SMT part is basically a more
aggressive select_idle_cpu(). Removing the avg condition fixed things
for him.

I also know FB likes this test gone, even though other workloads like
having it.

For now, take it out by default, until we get a better idea.

Reported-by: kitsunyan <kitsunyan@inbox.ru>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Mason <clm@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c     | 2 +-
 kernel/sched/features.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 274c747a01ce..b3ee10dd3e85 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5797,7 +5797,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	 * Due to large variance we need a large fuzz factor; hackbench in
 	 * particularly is sensitive here.
 	 */
-	if ((avg_idle / 512) < avg_cost)
+	if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
 		return -1;
 
 	time = local_clock();
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 69631fa46c2f..1b3c8189b286 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 
+/*
+ * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+ */
+SCHED_FEAT(SIS_AVG_CPU, false)
+
 #ifdef HAVE_RT_PUSH_IPI
 /*
  * In order to avoid a thundering herd attack of CPUs that are
-- 
cgit v1.2.3


From 0ba87bb27d66b78e278167ac7e20c66520b8a612 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 10:51:47 +0100
Subject: sched/core: Fix pick_next_task() for RT,DL

Pavan noticed that the following commit:

  49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class")

... broke RT,DL balancing by robbing them of the opportinty to do new-'idle'
balancing when their last runnable task (on that runqueue) goes away.

Reported-by: Pavan Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: 49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bbfb917a9b49..6699d43a8843 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3273,10 +3273,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	struct task_struct *p;
 
 	/*
-	 * Optimization: we know that if all tasks are in
-	 * the fair class we can call that function directly:
+	 * Optimization: we know that if all tasks are in the fair class we can
+	 * call that function directly, but only if the @prev task wasn't of a
+	 * higher scheduling class, because otherwise those loose the
+	 * opportunity to pull in more work from other CPUs.
 	 */
-	if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
+	if (likely((prev->sched_class == &idle_sched_class ||
+		    prev->sched_class == &fair_sched_class) &&
+		   rq->nr_running == rq->cfs.h_nr_running)) {
+
 		p = fair_sched_class.pick_next_task(rq, prev, rf);
 		if (unlikely(p == RETRY_TASK))
 			goto again;
-- 
cgit v1.2.3


From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 15:53:38 +0100
Subject: sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock()
 interface

Wanpeng Li reported that since the following commit:

  acb04058de49 ("sched/clock: Fix hotplug crash")

... KVM always runs with unstable sched-clock even though KVM's
kvm_clock _is_ stable.

The problem is that we've tied clear_sched_clock_stable() to the TSC
state, and overlooked that sched_clock() is a paravirt function.

Solve this by doing two things:

 - tie the sched_clock() stable state more clearly to the TSC stable
   state for the normal (!paravirt) case.

 - only call clear_sched_clock_stable() when we mark TSC unstable
   when we use native_sched_clock().

The first means we can actually run with stable sched_clock in more
situations then before, which is good. And since commit:

  12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method")

... this should be reliable. Since any detection of TSC fail now results
in marking the TSC unstable.

Reported-by: Wanpeng Li <kernellwp@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: acb04058de49 ("sched/clock: Fix hotplug crash")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c       |  4 ----
 arch/x86/kernel/cpu/centaur.c   |  2 --
 arch/x86/kernel/cpu/common.c    |  3 ---
 arch/x86/kernel/cpu/cyrix.c     |  1 -
 arch/x86/kernel/cpu/intel.c     |  4 ----
 arch/x86/kernel/cpu/transmeta.c |  2 --
 arch/x86/kernel/tsc.c           | 35 +++++++++++++++++++++++------------
 7 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4e95b2e0d95f..30d924ae5c34 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		if (check_tsc_unstable())
-			clear_sched_clock_stable();
-	} else {
-		clear_sched_clock_stable();
 	}
 
 	/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 2c234a6d94c4..bad8ff078a21 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 #endif
-
-	clear_sched_clock_stable();
 }
 
 static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c64ca5929cb5..9d98e2e15d54 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c)
 			strcpy(c->x86_model_id, "386");
 	}
 #endif
-	clear_sched_clock_stable();
 }
 
 static const struct cpu_dev default_cpu = {
@@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	 */
 	if (this_cpu->c_init)
 		this_cpu->c_init(c);
-	else
-		clear_sched_clock_stable();
 
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 47416f959a48..31e679238e8d 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		break;
 	}
-	clear_sched_clock_stable();
 }
 
 static void init_cyrix(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 017ecd3bb553..2388bafe5c37 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		if (check_tsc_unstable())
-			clear_sched_clock_stable();
-	} else {
-		clear_sched_clock_stable();
 	}
 
 	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index c1ea5b999839..6a9ad73a2c54 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
 		if (xlvl >= 0x80860001)
 			c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
 	}
-
-	clear_sched_clock_stable();
 }
 
 static void init_transmeta(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2724dc82f992..911129fda2f9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -326,9 +326,16 @@ unsigned long long sched_clock(void)
 {
 	return paravirt_sched_clock();
 }
+
+static inline bool using_native_sched_clock(void)
+{
+	return pv_time_ops.sched_clock == native_sched_clock;
+}
 #else
 unsigned long long
 sched_clock(void) __attribute__((alias("native_sched_clock")));
+
+static inline bool using_native_sched_clock(void) { return true; }
 #endif
 
 int check_tsc_unstable(void)
@@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 {
 	if (tsc_unstable)
 		return;
+
 	tsc_unstable = 1;
-	clear_sched_clock_stable();
+	if (using_native_sched_clock())
+		clear_sched_clock_stable();
 	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
@@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = {
 
 void mark_tsc_unstable(char *reason)
 {
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
+	if (tsc_unstable)
+		return;
+
+	tsc_unstable = 1;
+	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-		disable_sched_clock_irqtime();
-		pr_info("Marking TSC unstable due to %s\n", reason);
-		/* Change only the rating, when not registered */
-		if (clocksource_tsc.mult)
-			clocksource_mark_unstable(&clocksource_tsc);
-		else {
-			clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
-			clocksource_tsc.rating = 0;
-		}
+	disable_sched_clock_irqtime();
+	pr_info("Marking TSC unstable due to %s\n", reason);
+	/* Change only the rating, when not registered */
+	if (clocksource_tsc.mult) {
+		clocksource_mark_unstable(&clocksource_tsc);
+	} else {
+		clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
+		clocksource_tsc.rating = 0;
 	}
 }
 
-- 
cgit v1.2.3


From 2b232e0c3b3a09f3e33750aa20e314f1b80e5361 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 28 Feb 2017 09:40:11 +0000
Subject: locking/ww_mutex: Replace cpu_relax() with cond_resched() for tests

When busy-spinning on a ww_mutex_trylock(), we depend upon the other
thread advancing and releasing the lock. This can not happen on a single
CPU unless we relinquish it:

  [ ] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:1:18]
  ...
  [ ] Call Trace:
  [ ]  mutex_trylock()
  [ ]  test_mutex_work+0x31/0x56
  [ ]  process_one_work+0x1b4/0x2f9
  [ ]  worker_thread+0x1b0/0x27c
  [ ]  kthread+0xd1/0xd3
  [ ]  ret_from_fork+0x19/0x30

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: f2a5fec17395 ("locking/ww_mutex: Begin kselftests for ww_mutex")
Link: http://lkml.kernel.org/r/20170228094011.2595-1-chris@chris-wilson.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/test-ww_mutex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index da6c9a34f62f..3eb39c588397 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work)
 
 	if (mtx->flags & TEST_MTX_TRY) {
 		while (!ww_mutex_trylock(&mtx->mutex))
-			cpu_relax();
+			cond_resched();
 	} else {
 		ww_mutex_lock(&mtx->mutex, NULL);
 	}
@@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags)
 				ret = -EINVAL;
 				break;
 			}
-			cpu_relax();
+			cond_resched();
 		} while (time_before(jiffies, timeout));
 	} else {
 		ret = wait_for_completion_timeout(&mtx.done, TIMEOUT);
-- 
cgit v1.2.3


From 7fb4a2cea6b18dab56d609530d077f168169ed6b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 16:23:30 +0100
Subject: locking/lockdep: Add nest_lock integrity test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Boqun reported that hlock->references can overflow. Add a debug test
for that to generate a clear error when this happens.

Without this, lockdep is likely to report a mysterious failure on
unlock.

Reported-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9812e5dd409e..c0ee8607c11e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3260,10 +3260,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	if (depth) {
 		hlock = curr->held_locks + depth - 1;
 		if (hlock->class_idx == class_idx && nest_lock) {
-			if (hlock->references)
+			if (hlock->references) {
+				/*
+				 * Check: unsigned int references:12, overflow.
+				 */
+				if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+					return 0;
+
 				hlock->references++;
-			else
+			} else {
 				hlock->references = 2;
+			}
 
 			return 1;
 		}
-- 
cgit v1.2.3


From 857811a37129f5d2ba162d7be3986eff44724014 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Wed, 1 Mar 2017 23:01:38 +0800
Subject: locking/ww_mutex: Adjust the lock number for stress test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because there are only 12 bits in held_lock::references, so we only
support 4095 nested lock held in the same time, adjust the lock number
for ww_mutex stress test to kill one lockdep splat:

  [ ] [ BUG: bad unlock balance detected! ]
  [ ] kworker/u2:0/5 is trying to release lock (ww_class_mutex) at:
  [ ] ww_mutex_unlock()
  [ ] but there are no more locks to release!
  ...

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170301150138.hdixnmafzfsox7nn@tardis.cn.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/test-ww_mutex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 3eb39c588397..6b7abb334ca6 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void)
 	if (ret)
 		return ret;
 
-	ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
+	ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 1 Mar 2017 21:10:17 +0100
Subject: x86/hpet: Prevent might sleep splat on resume

Sergey reported a might sleep warning triggered from the hpet resume
path. It's caused by the call to disable_irq() from interrupt disabled
context.

The problem with the low level resume code is that it is not accounted as a
special system_state like we do during the boot process. Calling the same
code during system boot would not trigger the warning. That's inconsistent
at best.

In this particular case it's trivial to replace the disable_irq() with
disable_hardirq() because this particular code path is solely used from
system resume and the involved hpet interrupts can never be force threaded.


Reported-and-tested-by: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dc6ba5bda9fc..89ff7af2de50 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
 
 		irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
 		irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
-		disable_irq(hdev->irq);
+		disable_hardirq(hdev->irq);
 		irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 		enable_irq(hdev->irq);
 	}
-- 
cgit v1.2.3


From 7afbeb6df2aa5f9e3a0fc228817a85c16dea0faa Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 24 Feb 2017 12:56:12 +0100
Subject: s390/ipl: always use load normal for CCW-type re-IPL

commit 14890678687c ("s390/ipl: use load normal for LPAR re-ipl")
missed to convert one code path to use load normal semantics for
re-IPL. Convert the missing code path as well.

Fixes: 14890678687c ("s390/ipl: use load normal for LPAR re-ipl")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index b67dafb7b7cf..e545ffe5155a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,6 +564,8 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
+	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
-- 
cgit v1.2.3


From 2e4d88009f57057df7672fa69a32b5224af54d37 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.vnet.ibm.com>
Date: Thu, 2 Mar 2017 15:23:42 +0100
Subject: KVM: s390: Fix guest migration for huge guests resulting in panic

While we can technically not run huge page guests right now, we can
setup a guest with huge pages. Trying to migrate it will trigger a
VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it
will happily try to work on non-existing page table entries.

With this patch, we always return "dirty" if we encounter a large page
when migrating. This at least fixes the immediate problem until we
have proper handling for both kind of pages.

Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.")
Cc: <stable@vger.kernel.org> # 3.16+

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b48dc5f1900b..463e5ef02304 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 {
 	spinlock_t *ptl;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
 	pgste_t pgste;
 	pte_t *ptep;
 	pte_t pte;
 	bool dirty;
 
-	ptep = get_locked_pte(mm, addr, &ptl);
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return false;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return false;
+	/* We can't run guests backed by huge pages, but userspace can
+	 * still set them up and then try to migrate them without any
+	 * migration support.
+	 */
+	if (pmd_large(*pmd))
+		return true;
+
+	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (unlikely(!ptep))
 		return false;
 
-- 
cgit v1.2.3


From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:42 +0530
Subject: powerpc: Emulation support for load/store instructions on LE

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Cc: stable@vger.kernel.org # v3.18+
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 846dba2c6360..9c542ec70c5b 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto instr_done;
 
 	case LARX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (op.ea & (size - 1))
 			break;		/* can't handle misaligned */
 		if (!address_ok(regs, op.ea, size))
@@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto ldst_done;
 
 	case STCX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (op.ea & (size - 1))
 			break;		/* can't handle misaligned */
 		if (!address_ok(regs, op.ea, size))
@@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto ldst_done;
 
 	case LOAD:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
 		if (!err) {
 			if (op.type & SIGNEXT)
@@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 #ifdef CONFIG_PPC_FPU
 	case LOAD_FP:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (size == 4)
 			err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
 		else
@@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
 	case LOAD_VMX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
 		goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
 	case LOAD_VSX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
 		goto ldst_done;
 #endif
@@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto instr_done;
 
 	case STORE:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if ((op.type & UPDATE) && size == sizeof(long) &&
 		    op.reg == 1 && op.update_reg == 1 &&
 		    !(regs->msr & MSR_PR) &&
@@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 #ifdef CONFIG_PPC_FPU
 	case STORE_FP:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (size == 4)
 			err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
 		else
@@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
 	case STORE_VMX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
 		goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
 	case STORE_VSX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
 		goto ldst_done;
 #endif
-- 
cgit v1.2.3


From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:43 +0530
Subject: powerpc: emulate_step() tests for load/store instructions

Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  emulate_step_test: ld             : PASS
  emulate_step_test: lwz            : PASS
  emulate_step_test: lwzx           : PASS
  emulate_step_test: std            : PASS
  emulate_step_test: ldarx / stdcx. : PASS
  emulate_step_test: lfsx           : PASS
  emulate_step_test: stfsx          : PASS
  emulate_step_test: lfdx           : PASS
  emulate_step_test: stfdx          : PASS
  emulate_step_test: lvx            : PASS
  emulate_step_test: stvx           : PASS
  emulate_step_test: lxvd2x         : PASS
  emulate_step_test: stxvd2x        : PASS

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
[mpe: Drop start/complete lines, make it all __init]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/lib/Makefile             |   1 +
 arch/powerpc/lib/test_emulate_step.c  | 434 ++++++++++++++++++++++++++++++++++
 3 files changed, 442 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index d99bd442aacb..e7d6d86563ee 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND		0x40800000
 #define PPC_INST_LBZCIX			0x7c0006aa
 #define PPC_INST_STBCIX			0x7c0007aa
+#define PPC_INST_LWZX			0x7c00002e
+#define PPC_INST_LFSX			0x7c00042e
+#define PPC_INST_STFSX			0x7c00052e
+#define PPC_INST_LFDX			0x7c0004ae
+#define PPC_INST_STFDX			0x7c0005ae
+#define PPC_INST_LVX			0x7c0000ce
+#define PPC_INST_STVX			0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)	(((a) & 0x1f) << 16)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0e649d72fe8d..2b5e09020cfe 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -20,6 +20,7 @@ obj64-y	+= copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o
 
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000000000000..2534c1447554
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,434 @@
+/*
+ * Simple sanity test for emulate_step load/store instructions.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+
+#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)	(PPC_INST_LD | ___PPC_RT(r) |		\
+					___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZ(r, base, i)	(PPC_INST_LWZ | ___PPC_RT(r) |		\
+					___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZX(t, a, b)	(PPC_INST_LWZX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STD(r, base, i)	(PPC_INST_STD | ___PPC_RS(r) |		\
+					___PPC_RA(base) | ((i) & 0xfffc))
+#define TEST_LDARX(t, a, b, eh)	(PPC_INST_LDARX | ___PPC_RT(t) |	\
+					___PPC_RA(a) | ___PPC_RB(b) |	\
+					__PPC_EH(eh))
+#define TEST_STDCX(s, a, b)	(PPC_INST_STDCX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFSX(t, a, b)	(PPC_INST_LFSX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFSX(s, a, b)	(PPC_INST_STFSX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFDX(t, a, b)	(PPC_INST_LFDX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFDX(s, a, b)	(PPC_INST_STFDX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LVX(t, a, b)	(PPC_INST_LVX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STVX(s, a, b)	(PPC_INST_STVX | ___PPC_RS(s) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LXVD2X(s, a, b)	(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
+#define TEST_STXVD2X(s, a, b)	(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
+
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+	static unsigned long msr;
+	static bool msr_cached;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	if (likely(msr_cached)) {
+		regs->msr = msr;
+		return;
+	}
+
+	asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+	regs->msr |= MSR_FP;
+	regs->msr |= MSR_VEC;
+	regs->msr |= MSR_VSX;
+
+	msr = regs->msr;
+	msr_cached = true;
+}
+
+static void __init show_result(char *ins, char *result)
+{
+	pr_info("%-14s : %s\n", ins, result);
+}
+
+static void __init test_ld(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x23;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* ld r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_LD(5, 3, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("ld", "PASS");
+	else
+		show_result("ld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+	struct pt_regs regs;
+	unsigned int a = 0x4545;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* lwz r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_LWZ(5, 3, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("lwz", "PASS");
+	else
+		show_result("lwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+	struct pt_regs regs;
+	unsigned int a[3] = {0x0, 0x0, 0x1234};
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) a;
+	regs.gpr[4] = 8;
+	regs.gpr[5] = 0x8765;
+
+	/* lwzx r5, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LWZX(5, 3, 4));
+	if (stepped == 1 && regs.gpr[5] == a[2])
+		show_result("lwzx", "PASS");
+	else
+		show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[5] = 0x5678;
+
+	/* std r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_STD(5, 3, 0));
+	if (stepped == 1 || regs.gpr[5] == a)
+		show_result("std", "PASS");
+	else
+		show_result("std", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+	unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+	init_pt_regs(&regs);
+	asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+	/*** ldarx ***/
+
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[4] = 0;
+	regs.gpr[5] = 0x5678;
+
+	/* ldarx r5, r3, r4, 0 */
+	stepped = emulate_step(&regs, TEST_LDARX(5, 3, 4, 0));
+
+	/*
+	 * Don't touch 'a' here. Touching 'a' can do Load/store
+	 * of 'a' which result in failure of subsequent stdcx.
+	 * Instead, use hardcoded value for comparison.
+	 */
+	if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+		show_result("ldarx / stdcx.", "FAIL (ldarx)");
+		return;
+	}
+
+
+	/*** stdcx. ***/
+
+	regs.gpr[5] = 0x9ABC;
+
+	/* stdcx. r5, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STDCX(5, 3, 4));
+
+	/*
+	 * Two possible scenarios that indicates successful emulation
+	 * of stdcx. :
+	 *  1. Reservation is active and store is performed. In this
+	 *     case cr0.eq bit will be set to 1.
+	 *  2. Reservation is not active and store is not performed.
+	 *     In this case cr0.eq bit will be set to 0.
+	 */
+	if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+			|| (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+		show_result("ldarx / stdcx.", "PASS");
+	else
+		show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+	struct pt_regs regs;
+	union {
+		float a;
+		int b;
+	} c;
+	int cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfsx ***/
+
+	c.a = 123.45;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfsx frt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LFSX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lfsx", "PASS");
+	else
+		show_result("lfsx", "FAIL");
+
+
+	/*** stfsx ***/
+
+	c.a = 678.91;
+
+	/* stfsx frs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STFSX(10, 3, 4));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfsx", "PASS");
+	else
+		show_result("stfsx", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	struct pt_regs regs;
+	union {
+		double a;
+		long b;
+	} c;
+	long cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfdx ***/
+
+	c.a = 123456.78;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfdx frt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LFDX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lfdx", "PASS");
+	else
+		show_result("lfdx", "FAIL");
+
+
+	/*** stfdx ***/
+
+	c.a = 987654.32;
+
+	/* stfdx frs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STFDX(10, 3, 4));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfdx", "PASS");
+	else
+		show_result("stfdx", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+	show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lvx ***/
+
+	cached_b[0] = c.b[0] = 923745;
+	cached_b[1] = c.b[1] = 2139478;
+	cached_b[2] = c.b[2] = 9012;
+	cached_b[3] = c.b[3] = 982134;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lvx vrt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LVX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lvx", "PASS");
+	else
+		show_result("lvx", "FAIL");
+
+
+	/*** stvx ***/
+
+	c.b[0] = 4987513;
+	c.b[1] = 84313948;
+	c.b[2] = 71;
+	c.b[3] = 498532;
+
+	/* stvx vrs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STVX(10, 3, 4));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stvx", "PASS");
+	else
+		show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+	show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+	show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lxvd2x ***/
+
+	cached_b[0] = c.b[0] = 18233;
+	cached_b[1] = c.b[1] = 34863571;
+	cached_b[2] = c.b[2] = 834;
+	cached_b[3] = c.b[3] = 6138911;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
+
+	if (stepped == 1)
+		show_result("lxvd2x", "PASS");
+	else
+		show_result("lxvd2x", "FAIL");
+
+
+	/*** stxvd2x ***/
+
+	c.b[0] = 21379463;
+	c.b[1] = 87;
+	c.b[2] = 374234;
+	c.b[3] = 4;
+
+	/* stxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stxvd2x", "PASS");
+	else
+		show_result("stxvd2x", "FAIL");
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+	show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static int __init test_emulate_step(void)
+{
+	test_ld();
+	test_lwz();
+	test_lwzx();
+	test_std();
+	test_ldarx_stdcx();
+	test_lfsx_stfsx();
+	test_lfdx_stfdx();
+	test_lvx_stvx();
+	test_lxvd2x_stxvd2x();
+
+	return 0;
+}
+late_initcall(test_emulate_step);
-- 
cgit v1.2.3


From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 27 Feb 2017 14:32:41 +1100
Subject: powerpc/64: Invalidate process table caching after setting process
 table

The POWER9 MMU reads and caches entries from the process table.
When we kexec from one kernel to another, the second kernel sets
its process table pointer but doesn't currently do anything to
make the CPU invalidate any cached entries from the old process table.
This adds a tlbie (TLB invalidate entry) instruction with parameters
to invalidate caching of the process table after the new process
table is installed.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-radix.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index feeda90cd06d..01c94f141164 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void)
 	 */
 	register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
 	pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+	asm volatile("ptesync" : : : "memory");
+	asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+		     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
 
 static void __init radix_init_partition_table(void)
-- 
cgit v1.2.3


From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Mon, 27 Feb 2017 11:10:07 +0530
Subject: powerpc/powernv: Fix bug due to labeling ambiguity in
 power_enter_stop

Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to
power9_idle_stop") added additional code in power_enter_stop() to
distinguish between stop requests whose PSSCR had ESL=EC=1 from those
which did not. When ESL=EC=1, we do a forward-jump to a location
labelled by "1", which had the code to handle the ESL=EC=1 case.

Unfortunately just a couple of instructions before this label, is the
macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its
expansion.

As a result, the current code can result in directly executing stop
instruction for deep stop requests with PSSCR ESL=EC=1, without saving
the hypervisor state.

Fix this BUG by labeling the location that handles ESL=EC=1 case with
a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion
a la .Lxx from Anton Blanchard).

While at it, rename the label "2" labelling the location of the code
handling entry into deep stop states with ".Lhandle_deep_stop".

For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro
to an not-so commonly used value in order to avoid similar mishaps in
the future.

Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop")
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cpuidle.h |  4 ++--
 arch/powerpc/kernel/idle_book3s.S  | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index fd321eb423cb..155731557c9b 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err)
 	std	r0,0(r1);					\
 	ptesync;						\
 	ld	r0,0(r1);					\
-1:	cmpd	cr0,r0,r0;					\
-	bne	1b;						\
+236:	cmpd	cr0,r0,r0;					\
+	bne	236b;						\
 	IDLE_INST;						\
 
 #define	IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 5f61cc0349c0..995728736677 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -276,19 +276,21 @@ power_enter_stop:
  */
 	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
 	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
-	bne	 1f
+	bne	 .Lhandle_esl_ec_set
 	IDLE_STATE_ENTER_SEQ(PPC_STOP)
 	li	r3,0  /* Since we didn't lose state, return 0 */
 	b 	pnv_wakeup_noloss
+
+.Lhandle_esl_ec_set:
 /*
  * Check if the requested state is a deep idle state.
  */
-1:	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
+	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 	cmpd	r3,r4
-	bge	2f
+	bge	.Lhandle_deep_stop
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
-2:
+.Lhandle_deep_stop:
 /*
  * Entering deep idle state.
  * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
-- 
cgit v1.2.3


From a6d8a21596df041f36f4c2ccc260c459e3e851f1 Mon Sep 17 00:00:00 2001
From: Sachin Sant <sachinp@linux.vnet.ibm.com>
Date: Sun, 26 Feb 2017 11:38:39 +0530
Subject: selftest/powerpc: Fix false failures for skipped tests

Tests under alignment subdirectory are skipped when executed on previous
generation hardware, but harness still marks them as failed.

  test: test_copy_unaligned
  tags: git_version:unknown
  [SKIP] Test skipped on line 26
  skip: test_copy_unaligned
  selftests: copy_unaligned [FAIL]

The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till
the program exit which causes the test to be marked as failed.

This patch resets the value before returning to the main() routine.
With this patch the test o/p is as follows:

  test: test_copy_unaligned
  tags: git_version:unknown
  [SKIP] Test skipped on line 26
  skip: test_copy_unaligned
  selftests: copy_unaligned [PASS]

Signed-off-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/harness.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 248a820048df..66d31de60b9a 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name)
 
 	rc = run_test(test_function, name);
 
-	if (rc == MAGIC_SKIP_RETURN_VALUE)
+	if (rc == MAGIC_SKIP_RETURN_VALUE) {
 		test_skip(name);
-	else
+		/* so that skipped test is not marked as failed */
+		rc = 0;
+	} else
 		test_finish(name, rc);
 
 	return rc;
-- 
cgit v1.2.3


From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2016 13:46:20 +1100
Subject: powerpc: Fix compiling a BE kernel with a powerpc64le toolchain

GCC can compile with either endian, but the default ABI version is set
based on the default endianness of the toolchain. Alan Modra says:

  you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc
  generate powerpc64 code

The opposite is true for powerpc64 when generating -mlittle it
requires -mabi=elfv2 to generate v2 ABI, which we were already doing.

This change adds ABI annotations together with endianness for all cases,
LE and BE. This fixes the case of building a BE kernel with a toolchain
that is LE by default.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 31286fa7873c..19b0d1a81959 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -72,8 +72,15 @@ GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
 endif
 
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
+endif
+
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 ifneq ($(cc-name),clang)
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
@@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
 else
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
-- 
cgit v1.2.3


From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001
From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Date: Thu, 16 Feb 2017 09:11:29 -0600
Subject: powerpc/booke: Fix boot crash due to null hugepd

On 32-bit book-e machines, hugepd_ok() no longer takes into account null
hugepd values, causing this crash at boot:

  Unable to handle kernel paging request for data at address 0x80000000
  ...
  NIP [c0018378] follow_huge_addr+0x38/0xf0
  LR [c001836c] follow_huge_addr+0x2c/0xf0
  Call Trace:
   follow_huge_addr+0x2c/0xf0 (unreliable)
   follow_page_mask+0x40/0x3e0
   __get_user_pages+0xc8/0x450
   get_user_pages_remote+0x8c/0x250
   copy_strings+0x110/0x390
   copy_strings_kernel+0x2c/0x50
   do_execveat_common+0x478/0x630
   do_execve+0x2c/0x40
   try_to_run_init_process+0x18/0x60
   kernel_init+0xbc/0x110
   ret_from_kernel_thread+0x5c/0x64

This impacts all nxp (ex-freescale) 32-bit booke platforms.

This was caused by the change of hugepd_t.pd from signed to unsigned,
and the update to the nohash version of hugepd_ok(). Previously
hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas
now we need to explicitly check that the value is not zero and also that
PD_HUGE is *clear*.

This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte()
because on 32-bit we use pgtable-nopud.h, which causes the pgd_none()
check to be always false.

Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb")
Cc: stable@vger.kernel.org # v4.7+
Reported-by: Madalin-Cristian Bucur <madalin.bucur@nxp.com>
Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
[mpe: Flesh out change log details.]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 0cd8a3852763..e5805ad78e12 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd)
 	return ((hpd_val(hpd) & 0x4) != 0);
 #else
 	/* We clear the top bit to indicate hugepd */
-	return ((hpd_val(hpd) & PD_HUGE) ==  0);
+	return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0);
 #endif
 }
 
-- 
cgit v1.2.3


From 0265634eb9e04a16ae99941c320718c38eb865e0 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Sat, 25 Feb 2017 08:28:16 -0300
Subject: [media] serial_ir: ensure we're ready to receive interrupts

When the interrupt requested with devm_request_irq(), serial_ir.rcdev
is still null so will cause null deference if the irq handler is called
early on.

Also ensure that timeout_timer is setup.

Link: http://lkml.kernel.org/r/CA+55aFxsh2uF8gi5sN_guY3Z+tiLv7LpJYKBw+y8vqLzp+TsnQ@mail.gmail.com

[mchehab@s-opensource.com: moved serial_ir_probe() back to its original place]

Cc: <stable@vger.kernel.org> # 4.10
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/serial_ir.c | 123 ++++++++++++++++++++++---------------------
 1 file changed, 62 insertions(+), 61 deletions(-)

diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c
index 923fb2299553..41b54e40176c 100644
--- a/drivers/media/rc/serial_ir.c
+++ b/drivers/media/rc/serial_ir.c
@@ -487,10 +487,69 @@ static void serial_ir_timeout(unsigned long arg)
 	ir_raw_event_handle(serial_ir.rcdev);
 }
 
+/* Needed by serial_ir_probe() */
+static int serial_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
+			unsigned int count);
+static int serial_ir_tx_duty_cycle(struct rc_dev *dev, u32 cycle);
+static int serial_ir_tx_carrier(struct rc_dev *dev, u32 carrier);
+static int serial_ir_open(struct rc_dev *rcdev);
+static void serial_ir_close(struct rc_dev *rcdev);
+
 static int serial_ir_probe(struct platform_device *dev)
 {
+	struct rc_dev *rcdev;
 	int i, nlow, nhigh, result;
 
+	rcdev = devm_rc_allocate_device(&dev->dev, RC_DRIVER_IR_RAW);
+	if (!rcdev)
+		return -ENOMEM;
+
+	if (hardware[type].send_pulse && hardware[type].send_space)
+		rcdev->tx_ir = serial_ir_tx;
+	if (hardware[type].set_send_carrier)
+		rcdev->s_tx_carrier = serial_ir_tx_carrier;
+	if (hardware[type].set_duty_cycle)
+		rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle;
+
+	switch (type) {
+	case IR_HOMEBREW:
+		rcdev->input_name = "Serial IR type home-brew";
+		break;
+	case IR_IRDEO:
+		rcdev->input_name = "Serial IR type IRdeo";
+		break;
+	case IR_IRDEO_REMOTE:
+		rcdev->input_name = "Serial IR type IRdeo remote";
+		break;
+	case IR_ANIMAX:
+		rcdev->input_name = "Serial IR type AnimaX";
+		break;
+	case IR_IGOR:
+		rcdev->input_name = "Serial IR type IgorPlug";
+		break;
+	}
+
+	rcdev->input_phys = KBUILD_MODNAME "/input0";
+	rcdev->input_id.bustype = BUS_HOST;
+	rcdev->input_id.vendor = 0x0001;
+	rcdev->input_id.product = 0x0001;
+	rcdev->input_id.version = 0x0100;
+	rcdev->open = serial_ir_open;
+	rcdev->close = serial_ir_close;
+	rcdev->dev.parent = &serial_ir.pdev->dev;
+	rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER;
+	rcdev->driver_name = KBUILD_MODNAME;
+	rcdev->map_name = RC_MAP_RC6_MCE;
+	rcdev->min_timeout = 1;
+	rcdev->timeout = IR_DEFAULT_TIMEOUT;
+	rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT;
+	rcdev->rx_resolution = 250000;
+
+	serial_ir.rcdev = rcdev;
+
+	setup_timer(&serial_ir.timeout_timer, serial_ir_timeout,
+		    (unsigned long)&serial_ir);
+
 	result = devm_request_irq(&dev->dev, irq, serial_ir_irq_handler,
 				  share_irq ? IRQF_SHARED : 0,
 				  KBUILD_MODNAME, &hardware);
@@ -516,9 +575,6 @@ static int serial_ir_probe(struct platform_device *dev)
 		return -EBUSY;
 	}
 
-	setup_timer(&serial_ir.timeout_timer, serial_ir_timeout,
-		    (unsigned long)&serial_ir);
-
 	result = hardware_init_port();
 	if (result < 0)
 		return result;
@@ -552,7 +608,8 @@ static int serial_ir_probe(struct platform_device *dev)
 			 sense ? "low" : "high");
 
 	dev_dbg(&dev->dev, "Interrupt %d, port %04x obtained\n", irq, io);
-	return 0;
+
+	return devm_rc_register_device(&dev->dev, rcdev);
 }
 
 static int serial_ir_open(struct rc_dev *rcdev)
@@ -723,7 +780,6 @@ static void serial_ir_exit(void)
 
 static int __init serial_ir_init_module(void)
 {
-	struct rc_dev *rcdev;
 	int result;
 
 	switch (type) {
@@ -754,63 +810,9 @@ static int __init serial_ir_init_module(void)
 		sense = !!sense;
 
 	result = serial_ir_init();
-	if (result)
-		return result;
-
-	rcdev = devm_rc_allocate_device(&serial_ir.pdev->dev, RC_DRIVER_IR_RAW);
-	if (!rcdev) {
-		result = -ENOMEM;
-		goto serial_cleanup;
-	}
-
-	if (hardware[type].send_pulse && hardware[type].send_space)
-		rcdev->tx_ir = serial_ir_tx;
-	if (hardware[type].set_send_carrier)
-		rcdev->s_tx_carrier = serial_ir_tx_carrier;
-	if (hardware[type].set_duty_cycle)
-		rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle;
-
-	switch (type) {
-	case IR_HOMEBREW:
-		rcdev->input_name = "Serial IR type home-brew";
-		break;
-	case IR_IRDEO:
-		rcdev->input_name = "Serial IR type IRdeo";
-		break;
-	case IR_IRDEO_REMOTE:
-		rcdev->input_name = "Serial IR type IRdeo remote";
-		break;
-	case IR_ANIMAX:
-		rcdev->input_name = "Serial IR type AnimaX";
-		break;
-	case IR_IGOR:
-		rcdev->input_name = "Serial IR type IgorPlug";
-		break;
-	}
-
-	rcdev->input_phys = KBUILD_MODNAME "/input0";
-	rcdev->input_id.bustype = BUS_HOST;
-	rcdev->input_id.vendor = 0x0001;
-	rcdev->input_id.product = 0x0001;
-	rcdev->input_id.version = 0x0100;
-	rcdev->open = serial_ir_open;
-	rcdev->close = serial_ir_close;
-	rcdev->dev.parent = &serial_ir.pdev->dev;
-	rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER;
-	rcdev->driver_name = KBUILD_MODNAME;
-	rcdev->map_name = RC_MAP_RC6_MCE;
-	rcdev->min_timeout = 1;
-	rcdev->timeout = IR_DEFAULT_TIMEOUT;
-	rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT;
-	rcdev->rx_resolution = 250000;
-
-	serial_ir.rcdev = rcdev;
-
-	result = rc_register_device(rcdev);
-
 	if (!result)
 		return 0;
-serial_cleanup:
+
 	serial_ir_exit();
 	return result;
 }
@@ -818,7 +820,6 @@ serial_cleanup:
 static void __exit serial_ir_exit_module(void)
 {
 	del_timer_sync(&serial_ir.timeout_timer);
-	rc_unregister_device(serial_ir.rcdev);
 	serial_ir_exit();
 }
 
-- 
cgit v1.2.3


From 2e1e4949f9dfb053122785cd73540bb1e61f768b Mon Sep 17 00:00:00 2001
From: Qi Hou <qi.hou@windriver.com>
Date: Fri, 3 Mar 2017 15:57:11 +0800
Subject: i2c: add missing of_node_put in i2c_mux_del_adapters

Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter().
It must be decreased with of_node_put() in i2c_mux_del_adapters().

Cc: stable@vger.kernel.org
Signed-off-by: Qi Hou <qi.hou@windriver.com>
Reviewed-by: Zhang Xiao <xiao.zhang@windriver.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/i2c-mux.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 83768e85a919..2178266bca79 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 	while (muxc->num_adapters) {
 		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
 		struct i2c_mux_priv *priv = adap->algo_data;
+		struct device_node *np = adap->dev.of_node;
 
 		muxc->adapter[muxc->num_adapters] = NULL;
 
@@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 
 		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
 		i2c_del_adapter(adap);
+		of_node_put(np);
 		kfree(priv);
 	}
 }
-- 
cgit v1.2.3


From db5b15b74ed9a5c04bb808d18ffa2c773f5c18c0 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Mon, 13 Feb 2017 10:35:44 -0200
Subject: [media] lirc: fix dead lock between open and wakeup_filter

The locking in lirc needs improvement, but for now just fix this potential
deadlock.

======================================================
[ INFO: possible circular locking dependency detected ]
4.10.0-rc1+ #1 Not tainted
-------------------------------------------------------
bash/2502 is trying to acquire lock:
 (ir_raw_handler_lock){+.+.+.}, at: [<ffffffffc06f6a5e>] ir_raw_encode_scancode+0x3e/0xb0 [rc_core]

               but task is already holding lock:
 (&dev->lock){+.+.+.}, at: [<ffffffffc06f511f>] store_filter+0x9f/0x240 [rc_core]

               which lock already depends on the new lock.

               the existing dependency chain (in reverse order) is:

               -> #2 (&dev->lock){+.+.+.}:

[<ffffffffa110adad>] lock_acquire+0xfd/0x200
[<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0
[<ffffffffc06f436a>] rc_open+0x2a/0x80 [rc_core]
[<ffffffffc07114ca>] lirc_dev_fop_open+0xda/0x1e0 [lirc_dev]
[<ffffffffa12975e0>] chrdev_open+0xb0/0x210
[<ffffffffa128eb5a>] do_dentry_open+0x20a/0x2f0
[<ffffffffa128ffcc>] vfs_open+0x4c/0x80
[<ffffffffa12a35ec>] path_openat+0x5bc/0xc00
[<ffffffffa12a5271>] do_filp_open+0x91/0x100
[<ffffffffa12903f0>] do_sys_open+0x130/0x220
[<ffffffffa12904fe>] SyS_open+0x1e/0x20
[<ffffffffa19278c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2
               -> #1 (lirc_dev_lock){+.+.+.}:
[<ffffffffa110adad>] lock_acquire+0xfd/0x200
[<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0
[<ffffffffc0711f47>] lirc_register_driver+0x67/0x59b [lirc_dev]
[<ffffffffc06db7f4>] ir_lirc_register+0x1f4/0x260 [ir_lirc_codec]
[<ffffffffc06f6cac>] ir_raw_handler_register+0x7c/0xb0 [rc_core]
[<ffffffffc0398010>] 0xffffffffc0398010
[<ffffffffa1002192>] do_one_initcall+0x52/0x1b0
[<ffffffffa11ef5c8>] do_init_module+0x5f/0x1fa
[<ffffffffa11566b5>] load_module+0x2675/0x2b00
[<ffffffffa1156dcf>] SYSC_finit_module+0xdf/0x110
[<ffffffffa1156e1e>] SyS_finit_module+0xe/0x10
[<ffffffffa1003f5c>] do_syscall_64+0x6c/0x1f0
[<ffffffffa1927989>] return_from_SYSCALL_64+0x0/0x7a
               -> #0 (ir_raw_handler_lock){+.+.+.}:
[<ffffffffa110a7b7>] __lock_acquire+0x10f7/0x1290
[<ffffffffa110adad>] lock_acquire+0xfd/0x200
[<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0
[<ffffffffc06f6a5e>] ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
[<ffffffffc0b0f492>] loop_set_wakeup_filter+0x62/0xbd [rc_loopback]
[<ffffffffc06f522a>] store_filter+0x1aa/0x240 [rc_core]
[<ffffffffa15e46f8>] dev_attr_store+0x18/0x30
[<ffffffffa13318e5>] sysfs_kf_write+0x45/0x60
[<ffffffffa1330b55>] kernfs_fop_write+0x155/0x1e0
[<ffffffffa1290797>] __vfs_write+0x37/0x160
[<ffffffffa12921f8>] vfs_write+0xc8/0x1e0
[<ffffffffa12936e8>] SyS_write+0x58/0xc0
[<ffffffffa19278c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2

               other info that might help us debug this:

Chain exists of:
                 ir_raw_handler_lock --> lirc_dev_lock --> &dev->lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev->lock);
                               lock(lirc_dev_lock);
                               lock(&dev->lock);
  lock(ir_raw_handler_lock);

                *** DEADLOCK ***

4 locks held by bash/2502:
 #0:  (sb_writers#4){.+.+.+}, at: [<ffffffffa12922c5>] vfs_write+0x195/0x1e0
 #1:  (&of->mutex){+.+.+.}, at: [<ffffffffa1330b1f>] kernfs_fop_write+0x11f/0x1e0
 #2:  (s_active#215){.+.+.+}, at: [<ffffffffa1330b28>] kernfs_fop_write+0x128/0x1e0
 #3:  (&dev->lock){+.+.+.}, at: [<ffffffffc06f511f>] store_filter+0x9f/0x240 [rc_core]

               stack backtrace:
CPU: 3 PID: 2502 Comm: bash Not tainted 4.10.0-rc1+ #1
Hardware name:                  /DG45ID, BIOS IDG4510H.86A.0135.2011.0225.1100 02/25/2011
Call Trace:
 dump_stack+0x86/0xc3
 print_circular_bug+0x1be/0x210
 __lock_acquire+0x10f7/0x1290
 lock_acquire+0xfd/0x200
 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 mutex_lock_nested+0x77/0x6d0
 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 ? loop_set_wakeup_filter+0x44/0xbd [rc_loopback]
 ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 loop_set_wakeup_filter+0x62/0xbd [rc_loopback]
 ? loop_set_tx_duty_cycle+0x70/0x70 [rc_loopback]
 store_filter+0x1aa/0x240 [rc_core]
 dev_attr_store+0x18/0x30
 sysfs_kf_write+0x45/0x60
 kernfs_fop_write+0x155/0x1e0
 __vfs_write+0x37/0x160
 ? rcu_read_lock_sched_held+0x4a/0x80
 ? rcu_sync_lockdep_assert+0x2f/0x60
 ? __sb_start_write+0x10c/0x220
 ? vfs_write+0x195/0x1e0
 ? security_file_permission+0x3b/0xc0
 vfs_write+0xc8/0x1e0
 SyS_write+0x58/0xc0
 entry_SYSCALL_64_fastpath+0x1f/0xc2

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/lirc_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index a54ca531d8ef..4630f3e67538 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -436,6 +436,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 		return -ERESTARTSYS;
 
 	ir = irctls[iminor(inode)];
+	mutex_unlock(&lirc_dev_lock);
+
 	if (!ir) {
 		retval = -ENODEV;
 		goto error;
@@ -476,8 +478,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 	}
 
 error:
-	mutex_unlock(&lirc_dev_lock);
-
 	nonseekable_open(inode, file);
 
 	return retval;
-- 
cgit v1.2.3


From c1305a40722f3e02f3d971acc22f5991aff239a6 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 12 Feb 2017 15:01:22 -0200
Subject: [media] rc: nuvoton: fix deadlock in nvt_write_wakeup_codes

nvt_write_wakeup_codes acquires the same lock as the ISR but doesn't
disable interrupts on the local CPU. This caused the following
deadlock. Fix this by using spin_lock_irqsave.

[  432.362008] ================================
[  432.362074] WARNING: inconsistent lock state
[  432.362144] 4.10.0-rc7-next-20170210 #1 Not tainted
[  432.362219] --------------------------------
[  432.362286] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
[  432.362379] swapper/0/0 [HC1[1]:SC0[0]:HE0:SE1] takes:
[  432.362457]  (&(&nvt->lock)->rlock){?.+...}, at: [<ffffffffa016b17d>] nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.362611] {HARDIRQ-ON-W} state was registered at:
[  432.362686]
[  432.362698] [<ffffffff810adb7c>] __lock_acquire+0x5dc/0x1260
[  432.362812]
[  432.362817] [<ffffffff810aec29>] lock_acquire+0xe9/0x1d0
[  432.362927]
[  432.362934] [<ffffffff81609f63>] _raw_spin_lock+0x33/0x50
[  432.363045]
[  432.363051] [<ffffffffa016b822>] nvt_write_wakeup_codes.isra.12+0x22/0xe0 [nuvoton_cir]
[  432.363193]
[  432.363199] [<ffffffffa016b9bf>] wakeup_data_store+0xdf/0xf0 [nuvoton_cir]
[  432.363327]
[  432.363333] [<ffffffff81484223>] dev_attr_store+0x13/0x20
[  432.363441]
[  432.363449] [<ffffffff81232450>] sysfs_kf_write+0x40/0x50
[  432.363558]
[  432.363564] [<ffffffff81231640>] kernfs_fop_write+0x150/0x1e0
[  432.363676]
[  432.363685] [<ffffffff811b36a3>] __vfs_write+0x23/0x120
[  432.363791]
[  432.363798] [<ffffffff811b4d53>] vfs_write+0xc3/0x1e0
[  432.363902]
[  432.363909] [<ffffffff811b6124>] SyS_write+0x44/0xa0
[  432.364012]
[  432.364021] [<ffffffff81002c47>] do_syscall_64+0x57/0x140
[  432.364129]
[  432.364135] [<ffffffff8160a9e4>] return_from_SYSCALL_64+0x0/0x7a
[  432.364252] irq event stamp: 415118
[  432.364313] hardirqs last  enabled at (415115): [<ffffffff814fd2eb>] cpuidle_enter_state+0x11b/0x370
[  432.364445] hardirqs last disabled at (415116): [<ffffffff8160b2cb>] common_interrupt+0x8b/0x90
[  432.364573] softirqs last  enabled at (415118): [<ffffffff8106157c>] _local_bh_enable+0x1c/0x50
[  432.364699] softirqs last disabled at (415117): [<ffffffff810629a3>] irq_enter+0x43/0x60
[  432.364814]
               other info that might help us debug this:
[  432.364909]  Possible unsafe locking scenario:

[  432.367821]        CPU0
[  432.370645]        ----
[  432.373432]   lock(&(&nvt->lock)->rlock);
[  432.376228]   <Interrupt>
[  432.378982]     lock(&(&nvt->lock)->rlock);
[  432.381757]
                *** DEADLOCK ***

[  432.389888] no locks held by swapper/0/0.
[  432.392574]
               stack backtrace:
[  432.397774] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc7-next-20170210 #1
[  432.400375] Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015
[  432.403023] Call Trace:
[  432.405636]  <IRQ>
[  432.408208]  dump_stack+0x68/0x93
[  432.410775]  print_usage_bug+0x1dd/0x1f0
[  432.413334]  mark_lock+0x559/0x5c0
[  432.415871]  ? print_shortest_lock_dependencies+0x1a0/0x1a0
[  432.418431]  __lock_acquire+0x6b1/0x1260
[  432.420941]  lock_acquire+0xe9/0x1d0
[  432.423396]  ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.425844]  _raw_spin_lock+0x33/0x50
[  432.428252]  ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.430670]  nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.433085]  __handle_irq_event_percpu+0x43/0x330
[  432.435493]  handle_irq_event_percpu+0x1e/0x50
[  432.437884]  handle_irq_event+0x34/0x60
[  432.440236]  handle_edge_irq+0x6a/0x150
[  432.442561]  handle_irq+0x15/0x20
[  432.444854]  do_IRQ+0x57/0x110
[  432.447115]  common_interrupt+0x90/0x90
[  432.449380] RIP: 0010:cpuidle_enter_state+0x120/0x370
[  432.451653] RSP: 0018:ffffffff81c03dd8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffffcc
[  432.453994] RAX: ffffffff81c14500 RBX: 0000000000000008 RCX: 00000064aac6f2d2
[  432.456349] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffff81c14500
[  432.458704] RBP: ffffffff81c03e18 R08: cccccccccccccccd R09: 0000000000000018
[  432.461072] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880100a21260
[  432.463450] R13: ffffffff81c7e6f8 R14: 0000000000000008 R15: ffffffff81c7e6e0
[  432.465819]  </IRQ>
[  432.468104]  ? cpuidle_enter_state+0x11b/0x370
[  432.470413]  cpuidle_enter+0x12/0x20
[  432.472698]  call_cpuidle+0x1e/0x40
[  432.474967]  do_idle+0xe3/0x1c0
[  432.477172]  cpu_startup_entry+0x18/0x20
[  432.479376]  rest_init+0x130/0x140
[  432.481565]  start_kernel+0x3cc/0x3d9
[  432.483750]  x86_64_start_reservations+0x2a/0x2c
[  432.485980]  x86_64_start_kernel+0x178/0x18b
[  432.488222]  start_cpu+0x14/0x14
[  432.490453]  ? start_cpu+0x14/0x14

Fixes: 97c129747af5 "[media] rc: nuvoton-cir: Add support wakeup via sysfs filter callback"

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/nuvoton-cir.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index b109f8246b96..ec4b25bd2ec2 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -176,12 +176,13 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev,
 {
 	u8 tolerance, config;
 	struct nvt_dev *nvt = dev->priv;
+	unsigned long flags;
 	int i;
 
 	/* hardcode the tolerance to 10% */
 	tolerance = DIV_ROUND_UP(count, 10);
 
-	spin_lock(&nvt->lock);
+	spin_lock_irqsave(&nvt->lock, flags);
 
 	nvt_clear_cir_wake_fifo(nvt);
 	nvt_cir_wake_reg_write(nvt, count, CIR_WAKE_FIFO_CMP_DEEP);
@@ -203,7 +204,7 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev,
 
 	nvt_cir_wake_reg_write(nvt, config, CIR_WAKE_IRCON);
 
-	spin_unlock(&nvt->lock);
+	spin_unlock_irqrestore(&nvt->lock, flags);
 }
 
 static ssize_t wakeup_data_show(struct device *dev,
-- 
cgit v1.2.3


From 413808685dd7c9b54bbc5af79da2eaddd0fc3cb2 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Wed, 22 Feb 2017 18:48:01 -0300
Subject: [media] rc: raw decoder for keymap protocol is not loaded on register

When the protocol is set via the sysfs protocols attribute, the
decoder is loaded. However, when it is not when a device is first
plugged in or registered.

Fixes: acc1c3c ("[media] media: rc: load decoder modules on-demand")

Signed-off-by: Sean Young <sean@mess.org>
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/rc-main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 2424946740e6..a158b321b40a 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1663,6 +1663,7 @@ static int rc_setup_rx_device(struct rc_dev *dev)
 {
 	int rc;
 	struct rc_map *rc_map;
+	u64 rc_type;
 
 	if (!dev->map_name)
 		return -EINVAL;
@@ -1677,15 +1678,18 @@ static int rc_setup_rx_device(struct rc_dev *dev)
 	if (rc)
 		return rc;
 
-	if (dev->change_protocol) {
-		u64 rc_type = (1ll << rc_map->rc_type);
+	rc_type = BIT_ULL(rc_map->rc_type);
 
+	if (dev->change_protocol) {
 		rc = dev->change_protocol(dev, &rc_type);
 		if (rc < 0)
 			goto out_table;
 		dev->enabled_protocols = rc_type;
 	}
 
+	if (dev->driver_type == RC_DRIVER_IR_RAW)
+		ir_raw_load_modules(&rc_type);
+
 	set_bit(EV_KEY, dev->input_dev->evbit);
 	set_bit(EV_REP, dev->input_dev->evbit);
 	set_bit(EV_MSC, dev->input_dev->evbit);
-- 
cgit v1.2.3


From 5df62771c556e4ec9d7ecea1f070ff6da4bce151 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Thu, 23 Feb 2017 06:11:21 -0300
Subject: [media] rc: protocol is not set on register for raw IR devices

ir_raw_event_register() sets up change_protocol(), and without that set,
rc_setup_rx_device() does not set the protocol for the device on register.

The standard udev rules run ir-keytable, which writes to the protocols
file again, which hides this problem.

Fixes: 7ff2c2b ("[media] rc-main: split setup and unregister functions")

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/rc-main.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index a158b321b40a..d84533699668 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1781,12 +1781,6 @@ int rc_register_device(struct rc_dev *dev)
 		dev->input_name ?: "Unspecified device", path ?: "N/A");
 	kfree(path);
 
-	if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
-		rc = rc_setup_rx_device(dev);
-		if (rc)
-			goto out_dev;
-	}
-
 	if (dev->driver_type == RC_DRIVER_IR_RAW ||
 	    dev->driver_type == RC_DRIVER_IR_RAW_TX) {
 		if (!raw_init) {
@@ -1795,7 +1789,13 @@ int rc_register_device(struct rc_dev *dev)
 		}
 		rc = ir_raw_event_register(dev);
 		if (rc < 0)
-			goto out_rx;
+			goto out_dev;
+	}
+
+	if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
+		rc = rc_setup_rx_device(dev);
+		if (rc)
+			goto out_raw;
 	}
 
 	/* Allow the RC sysfs nodes to be accessible */
@@ -1807,8 +1807,8 @@ int rc_register_device(struct rc_dev *dev)
 
 	return 0;
 
-out_rx:
-	rc_free_rx_device(dev);
+out_raw:
+	ir_raw_event_unregister(dev);
 out_dev:
 	device_del(&dev->dev);
 out_unlock:
-- 
cgit v1.2.3


From 92ad18ec26611e8a47639e600bd9dc42fbe2edcf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 12:53:26 -0500
Subject: ftrace/graph: Do not modify the EMPTY_HASH for the function_graph
 filter

On boot up, if the kernel command line sets a graph funtion with the kernel
command line options "ftrace_graph_filter" or "ftrace_graph_notrace" then it
updates the corresponding function graph hash, ftrace_graph_hash or
ftrace_graph_notrace_hash respectively. Unfortunately, at boot up, these
variables are pointers to the "EMPTY_HASH" which is a constant used as a
placeholder when a hash has no entities. The problem was that the comand
line version to set the hashes updated the actual EMPTY_HASH instead of
creating a new hash for the function graph. This broke the EMPTY_HASH
because not only did it modify a constant (not sure how that was allowed to
happen, except maybe because it was done at early boot, const variables were
still mutable), but it made the filters have functions listed in them when
they were actually empty.

The kernel command line function needs to allocate a new hash for the
function graph filters and assign the necessary variables to that new hash
instead.

Link: http://lkml.kernel.org/r/1488420091.7212.17.camel@linux.intel.com

Cc: Namhyung Kim <namhyung@kernel.org>
Fixes: b9b0c831bed2 ("ftrace: Convert graph filter to use hash tables")
Reported-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Tested-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fd84f2e30b6d..44122e7a6418 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4421,10 +4421,9 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
 	char *func;
 	struct ftrace_hash *hash;
 
-	if (enable)
-		hash = ftrace_graph_hash;
-	else
-		hash = ftrace_graph_notrace_hash;
+	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+	if (WARN_ON(!hash))
+		return;
 
 	while (buf) {
 		func = strsep(&buf, ",");
@@ -4434,6 +4433,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
 			printk(KERN_DEBUG "ftrace: function %s not "
 					  "traceable\n", func);
 	}
+
+	if (enable)
+		ftrace_graph_hash = hash;
+	else
+		ftrace_graph_notrace_hash = hash;
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-- 
cgit v1.2.3


From ab42632156becd35d3884ee5c14da2bedbf3149a Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Wed, 1 Mar 2017 14:04:53 -0800
Subject: module: set __jump_table alignment to 8

For powerpc the __jump_table section in modules is not aligned, this
causes a WARN_ON() splat when loading a module containing a __jump_table.

Strict alignment became necessary with commit 3821fd35b58d
("jump_label: Reduce the size of struct static_key"), currently in
linux-next, which uses the two least significant bits of pointers to
__jump_table elements.

Fix by forcing __jump_table to 8, which is the same alignment used for
this section in the kernel proper.

Link: http://lkml.kernel.org/r/20170301220453.4756-1-david.daney@cavium.com

Reviewed-by: Jason Baron <jbaron@akamai.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 scripts/module-common.lds | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 73a2c7da0e55..53234e85192a 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -19,4 +19,6 @@ SECTIONS {
 
 	. = ALIGN(8);
 	.init_array		0 : { *(SORT(.init_array.*)) *(.init_array) }
+
+	__jump_table		0 : ALIGN(8) { KEEP(*(__jump_table)) }
 }
-- 
cgit v1.2.3


From cd8d860dcce906cd477be7d0648ba6f56a52eaa6 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Tue, 28 Feb 2017 11:32:22 -0500
Subject: jump_label: Fix anonymous union initialization

Pre-4.6 gcc do not allow direct static initialization of members of
anonymous structs/unions. After commit 3821fd35b58d ("jump_label:
Reduce the size of struct static_key") STATIC_KEY_INIT_{TRUE|FALSE}
definitions cannot be compiled with those older compilers.

Placing initializers inside curved brackets works around this problem.

Link: http://lkml.kernel.org/r/1488299542-30765-1-git-send-email-boris.ostrovsky@oracle.com

Fixes: 3821fd35b58d ("jump_label: Reduce the size of struct static_key")
Reviewed-by: Jason Baron <jbaron@akamai.com>
Compiled-by: Chris Mason <clm@fb.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 680c98b2f41c..a7f90117cf7d 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -166,10 +166,10 @@ extern void static_key_disable(struct static_key *key);
  */
 #define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = { 1 },					\
-	  .entries = (void *)JUMP_TYPE_TRUE }
+	  { .entries = (void *)JUMP_TYPE_TRUE } }
 #define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = { 0 },					\
-	  .entries = (void *)JUMP_TYPE_FALSE }
+	  { .entries = (void *)JUMP_TYPE_FALSE } }
 
 #else  /* !HAVE_JUMP_LABEL */
 
-- 
cgit v1.2.3


From b17ef2ed624aa7c1f68ed11acd16ecbf80fe01d7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 17:28:45 -0500
Subject: jump_label: Add comment about initialization order for anonymous
 unions

Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key")
broke old compilers that could not handle static initialization of anonymous
unions. Boris fixed it with a patch that added brackets around the static
initializer. But this creates a dependency between those initializers and
the structure's order of its fields. Document this dependency in case new
fields are added to struct static_key in the future.

Noted-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Suggested-by: Chris Mason <clm@fb.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a7f90117cf7d..28e04a33535a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -90,6 +90,13 @@ extern bool static_key_initialized;
 struct static_key {
 	atomic_t enabled;
 /*
+ * Note:
+ *   To make anonymous unions work with old compilers, the static
+ *   initialization of them requires brackets. This creates a dependency
+ *   on the order of the struct with the initializers. If any fields
+ *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
+ *   to be modified.
+ *
  * bit 0 => 1 if key is initially true
  *	    0 if initially false
  * bit 1 => 1 if points to struct static_key_mod
-- 
cgit v1.2.3


From bf7165cfa23695c51998231c4efa080fe1d3548d Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 28 Sep 2016 22:55:54 -0400
Subject: tracing: Add #undef to fix compile error

There are several trace include files that define TRACE_INCLUDE_FILE.

Include several of them in the same .c file (as I currently have in
some code I am working on), and the compile will blow up with a
"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls"

Every other include file in include/trace/events/ avoids that issue
by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h
should have one, too.

Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com

Cc: stable@vger.kernel.org
Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer")
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/syscalls.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 14e49c798135..b35533b94277 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -1,5 +1,6 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM raw_syscalls
+#undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE syscalls
 
 #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
-- 
cgit v1.2.3


From 65a50c656276b0846bea09dd011c0a3d35b77f3e Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Thu, 2 Mar 2017 16:12:15 -0800
Subject: ftrace/graph: Add ftrace_graph_max_depth kernel parameter

Early trace callgraphs can be extremely large on systems with
several seconds of boot time. The max_depth parameter limits how
deep the graph trace goes and reduces the output size. This
parameter is the same as the max_graph_depth file in tracefs.

Link: http://lkml.kernel.org/r/1488499935-23216-1-git-send-email-todd.e.brandt@linux.intel.com

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
[ changed comments about debugfs to tracefs ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
 kernel/trace/ftrace.c                           | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 21e2d8863705..1c9016b27ee9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1173,6 +1173,12 @@
 			functions that can be changed at run time by the
 			set_graph_notrace file in the debugfs tracing directory.
 
+	ftrace_graph_max_depth=<uint>
+			[FTRACE] Used with the function graph tracer. This is
+			the max depth it will trace into a function. This value
+			can be changed at run time by the max_graph_depth file
+			in the tracefs tracing directory. default: 0 (no limit)
+
 	gamecon.map[2|3]=
 			[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
 			support via parallel port (up to 5 devices per port)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 44122e7a6418..d129ae51329a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4415,6 +4415,15 @@ static int __init set_graph_notrace_function(char *str)
 }
 __setup("ftrace_graph_notrace=", set_graph_notrace_function);
 
+static int __init set_graph_max_depth_function(char *str)
+{
+	if (!str)
+		return 0;
+	fgraph_max_depth = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+__setup("ftrace_graph_max_depth=", set_graph_max_depth_function);
+
 static void __init set_ftrace_early_graph(char *buf, int enable)
 {
 	int ret;
-- 
cgit v1.2.3


From 2bc756e7dde20972300bb8e2e46dd430d6d2d5db Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Mar 2017 23:22:29 +0100
Subject: cpufreq: intel_pstate: Do not use performance_limits in passive mode

Using performance_limits in the passive mode doesn't make
sense, because in that mode the global limits are applied to the
frequency selected by the scaling governor.

The maximum and minimum P-state limits in performance_limits are both
set to 100 percent which will put all CPUs into the turbo range
regardless of what governor is used and what frequencies are
selected by it (that is particularly undesirable on CPUs with the
generic powersave governor attached).

For this reason, make intel_pstate_register_driver() always point
limits to powersave_limits in the passive mode.

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9ee13f195c37..7fc1f8a0ef44 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2442,8 +2442,11 @@ static int intel_pstate_register_driver(void)
 
 	intel_pstate_init_limits(&powersave_limits);
 	intel_pstate_set_performance_limits(&performance_limits);
-	limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ?
-			&performance_limits : &powersave_limits;
+	if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
+	    intel_pstate_driver == &intel_pstate)
+		limits = &performance_limits;
+	else
+		limits = &powersave_limits;
 
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
-- 
cgit v1.2.3


From 7f17326fc0b69afbda7aed6c4ce8e2328b74203b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Mar 2017 23:24:36 +0100
Subject: cpufreq: intel_pstate: Fix intel_cpufreq_verify_policy()

The intel_pstate_update_perf_limits() called from
intel_cpufreq_verify_policy() may cause global P-state limits
to change which is generally confusing and unnecessary.

In the passive mode the global limits are only applied to the
frequency selected by the scaling governor (they are not taken
into account by governors when making decisions anyway), so making
them follow the per-policy limits serves no purpose and may go
against user expectations (as it generally causes the global
attributes in sysfs to change even though they have not been
written to in some cases).

Fix that by dropping the intel_pstate_update_perf_limits()
invocation from intel_cpufreq_verify_policy() (which also
reduces the code size by a few lines).

This change does not affect the per-CPU limits case, because those
limits allow any P-state to be set by default in the passive mode
and it removes the only piece of code updating them in that mode,
so the per-policy settings will be the only ones taken into account
in that case as expected.

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7fc1f8a0ef44..04b2aa162276 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2306,7 +2306,6 @@ static struct cpufreq_driver intel_pstate = {
 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
-	struct perf_limits *perf_limits = limits;
 
 	update_turbo_state();
 	policy->cpuinfo.max_freq = limits->turbo_disabled ?
@@ -2314,15 +2313,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 
 	cpufreq_verify_within_cpu_limits(policy);
 
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
-	mutex_lock(&intel_pstate_limits_lock);
-
-	intel_pstate_update_perf_limits(policy, perf_limits);
-
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6407829901f074cf6beef566d6af9a0b0e238f0d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Mar 2017 23:51:31 +0100
Subject: cpufreq: intel_pstate: Avoid triggering cpu_frequency tracepoint
 unnecessarily

In the passive mode the cpu_frequency trace event is already
triggered by the cpufreq core or by scaling governors, so
intel_pstate should not trigger it once again for the same
P-state updates.

In addition to that, the frequency returned by
intel_cpufreq_fast_switch() and passed via freqs.new from
intel_cpufreq_target() to cpufreq_freq_transition_end() should
reflect the P-state actually set, so make that happen.

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 04b2aa162276..5e066b332598 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1879,13 +1879,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 
 	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 	pstate = clamp_t(int, pstate, min_perf, max_perf);
-	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 	return pstate;
 }
 
 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 {
-	pstate = intel_pstate_prepare_request(cpu, pstate);
 	if (pstate == cpu->pstate.current_pstate)
 		return;
 
@@ -1905,6 +1903,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
 	update_turbo_state();
 
+	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+	trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
 	intel_pstate_update_pstate(cpu, target_pstate);
 
 	sample = &cpu->sample;
@@ -2365,6 +2365,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
 		wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
 			      pstate_funcs.get_val(cpu, target_pstate));
 	}
+	freqs.new = target_pstate * cpu->pstate.scaling;
 	cpufreq_freq_transition_end(policy, &freqs, false);
 
 	return 0;
@@ -2378,8 +2379,9 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 
 	target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
 	target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
 	intel_pstate_update_pstate(cpu, target_pstate);
-	return target_freq;
+	return target_pstate * cpu->pstate.scaling;
 }
 
 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
-- 
cgit v1.2.3


From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 2 Mar 2017 17:41:24 +1100
Subject: powerpc/powernv: Fix opal tracepoints with JUMP_LABEL=n

The recent commit to allow calling OPAL calls in real mode, commit
ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode
calls"), introduced a bug when CONFIG_JUMP_LABEL=n.

The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but
we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled.
This leads to us using the tracepoint refcount as the MSR value,
typically zero, and saving that into PACASAVEDMSR. When we return from
OPAL we use that value as the MSR value for rfid, meaning we switch to
32-bit BE real mode - hilarity ensues.

Fix it by using r11 in OPAL_BRANCH, which is not live at the time the
macro is used in OPAL_CALL.

Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls")
Suggested-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6693f75e93d1..da8a0f7a035c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -39,8 +39,8 @@ opal_tracepoint_refcount:
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
-	ld	r12,opal_tracepoint_refcount@toc(r2);		\
-	cmpdi	r12,0;						\
+	ld	r11,opal_tracepoint_refcount@toc(r2);		\
+	cmpdi	r11,0;						\
 	bne-	LABEL;						\
 1:
 
-- 
cgit v1.2.3


From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001
From: Shile Zhang <shile.zhang@nokia.com>
Date: Sat, 4 Feb 2017 17:03:40 +0800
Subject: powerpc/64: Fix checksum folding in csum_add()

Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64:
Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold")
missed a case in csum_add(). Fix it.

Signed-off-by: Shile Zhang <shile.zhang@nokia.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/checksum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 4e63787dc3be..842124b199b5 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
 
 #ifdef __powerpc64__
 	res += (__force u64)addend;
-	return (__force __wsum)((u32)res + (res >> 32));
+	return (__force __wsum) from64to32(res);
 #else
 	asm("addc %0,%0,%1;"
 	    "addze %0,%0;"
-- 
cgit v1.2.3


From 10e5778f54765c96fe0c8f104b7a030e5b35bc72 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 4 Mar 2017 07:02:10 -0800
Subject: ARM: OMAP2+: Fix device node reference counts

After commit 0549bde0fcb1 ("of: fix of_node leak caused in
of_find_node_opts_by_path"), the following error may be
reported when running omap images.

OF: ERROR: Bad of_node_put() on /ocp@68000000
CPU: 0 PID: 0 Comm: swapper Not tainted 4.10.0-rc7-next-20170210 #1
Hardware name: Generic OMAP3-GP (Flattened Device Tree)
[<c0310604>] (unwind_backtrace) from [<c030bbf4>] (show_stack+0x10/0x14)
[<c030bbf4>] (show_stack) from [<c05add8c>] (dump_stack+0x98/0xac)
[<c05add8c>] (dump_stack) from [<c05af1b0>] (kobject_release+0x48/0x7c)
[<c05af1b0>] (kobject_release)
	from [<c0ad1aa4>] (of_find_node_by_name+0x74/0x94)
[<c0ad1aa4>] (of_find_node_by_name)
	from [<c1215bd4>] (omap3xxx_hwmod_is_hs_ip_block_usable+0x24/0x2c)
[<c1215bd4>] (omap3xxx_hwmod_is_hs_ip_block_usable) from
[<c1215d5c>] (omap3xxx_hwmod_init+0x180/0x274)
[<c1215d5c>] (omap3xxx_hwmod_init)
	from [<c120faa8>] (omap3_init_early+0xa0/0x11c)
[<c120faa8>] (omap3_init_early)
	from [<c120fb2c>] (omap3430_init_early+0x8/0x30)
[<c120fb2c>] (omap3430_init_early)
	from [<c1204710>] (setup_arch+0xc04/0xc34)
[<c1204710>] (setup_arch) from [<c1200948>] (start_kernel+0x68/0x38c)
[<c1200948>] (start_kernel) from [<8020807c>] (0x8020807c)

of_find_node_by_name() drops the reference to the passed device node.
The commit referenced above exposes this problem.

To fix the problem, use of_get_child_by_name() instead of
of_find_node_by_name(); of_get_child_by_name() does not drop
the reference count of passed device nodes. While semantically
different, we only look for immediate children of the passed
device node, so of_get_child_by_name() is a more appropriate
function to use anyway.

Release the reference to the device node obtained with
of_get_child_by_name() after it is no longer needed to avoid
another device node leak.

While at it, clean up the code and change the return type of
omap3xxx_hwmod_is_hs_ip_block_usable() to bool to match its use
and the return type of of_device_is_available().

Cc: Qi Hou <qi.hou@windriver.com>
Cc: Peter Rosin <peda@axentia.se>
Cc: Rob Herring <robh@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 507ff0795a8e..0fa08c1e4701 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3131,16 +3131,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = {
  * Return: 0 if device named @dev_name is not likely to be accessible,
  * or 1 if it is likely to be accessible.
  */
-static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
-						       const char *dev_name)
+static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
+							const char *dev_name)
 {
+	struct device_node *node;
+	bool available;
+
 	if (!bus)
-		return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;
+		return omap_type() == OMAP2_DEVICE_TYPE_GP;
 
-	if (of_device_is_available(of_find_node_by_name(bus, dev_name)))
-		return 1;
+	node = of_get_child_by_name(bus, dev_name);
+	available = of_device_is_available(node);
+	of_node_put(node);
 
-	return 0;
+	return available;
 }
 
 int __init omap3xxx_hwmod_init(void)
-- 
cgit v1.2.3


From b92675d998a9fa37fe9e0e35053a95b4a23c158b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 4 Mar 2017 07:02:11 -0800
Subject: ARM: OMAP2+: Release device node after it is no longer needed.

The device node returned by of_find_node_by_name() needs to be released
after it is no longer needed to avoid a device node leak.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 0fa08c1e4701..1435fee39a89 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3213,15 +3213,20 @@ int __init omap3xxx_hwmod_init(void)
 
 	if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {
 		r = omap_hwmod_register_links(h_sham);
-		if (r < 0)
+		if (r < 0) {
+			of_node_put(bus);
 			return r;
+		}
 	}
 
 	if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {
 		r = omap_hwmod_register_links(h_aes);
-		if (r < 0)
+		if (r < 0) {
+			of_node_put(bus);
 			return r;
+		}
 	}
+	of_node_put(bus);
 
 	/*
 	 * Register hwmod links specific to certain ES levels of a
-- 
cgit v1.2.3


From 1c2bcc766be44467809f1798cd4ceacafe20a852 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 22 Feb 2017 17:25:42 +0100
Subject: batman-adv: Keep fragments equally sized
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The batman-adv fragmentation packets have the design problem that they
cannot be refragmented and cannot handle padding by the underlying link.
The latter often leads to problems when networks are incorrectly configured
and don't use a common MTU.

The sender could for example fragment a 1271 byte frame (plus external
ethernet header (14) and batadv unicast header (10)) to fit in a 1280 bytes
large MTU of the underlying link (max. 1294 byte frames). This would create
a 1294 bytes large frame (fragment 2) and a 55 bytes large frame
(fragment 1). The extra 54 bytes are the fragment header (20) added to each
fragment and the external ethernet header (14) for the second fragment.

Let us assume that the next hop is then not able to transport 1294 bytes to
its next hop. The 1294 byte large frame will be dropped but the 55 bytes
large fragment will still be forwarded to its destination.

Or let us assume that the underlying hardware requires that each frame has
a minimum size (e.g. 60 bytes). Then it will pad the 55 bytes frame to 60
bytes. The receiver of the 60 bytes frame will no longer be able to
correctly assemble the two frames together because it is not aware that 5
bytes of the 60 bytes frame are padding and don't belong to the reassembled
frame.

This can partly be avoided by splitting frames more equally. In this
example, the 675 and 674 bytes large fragment frames could both potentially
reach its destination without being too large or too small.

Reported-by: Martin Weinelt <martin@darmstadt.freifunk.net>
Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 11149e5be4e0..106bda56ec98 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -404,7 +404,7 @@ out:
  * batadv_frag_create - create a fragment from skb
  * @skb: skb to create fragment from
  * @frag_head: header to use in new fragment
- * @mtu: size of new fragment
+ * @fragment_size: size of new fragment
  *
  * Split the passed skb into two fragments: A new one with size matching the
  * passed mtu and the old one with the rest. The new skb contains data from the
@@ -414,11 +414,11 @@ out:
  */
 static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
 					  struct batadv_frag_packet *frag_head,
-					  unsigned int mtu)
+					  unsigned int fragment_size)
 {
 	struct sk_buff *skb_fragment;
 	unsigned int header_size = sizeof(*frag_head);
-	unsigned int fragment_size = mtu - header_size;
+	unsigned int mtu = fragment_size + header_size;
 
 	skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
 	if (!skb_fragment)
@@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 	struct sk_buff *skb_fragment;
 	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
 	unsigned int header_size = sizeof(frag_header);
-	unsigned int max_fragment_size, max_packet_size;
+	unsigned int max_fragment_size, num_fragments;
 	int ret;
 
 	/* To avoid merge and refragmentation at next-hops we never send
@@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 	 */
 	mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
 	max_fragment_size = mtu - header_size;
-	max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+	if (skb->len == 0 || max_fragment_size == 0)
+		return -EINVAL;
+
+	num_fragments = (skb->len - 1) / max_fragment_size + 1;
+	max_fragment_size = (skb->len - 1) / num_fragments + 1;
 
 	/* Don't even try to fragment, if we need more than 16 fragments */
-	if (skb->len > max_packet_size) {
+	if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) {
 		ret = -EAGAIN;
 		goto free_skb;
 	}
@@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 			goto put_primary_if;
 		}
 
-		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+		skb_fragment = batadv_frag_create(skb, &frag_header,
+						  max_fragment_size);
 		if (!skb_fragment) {
 			ret = -ENOMEM;
 			goto put_primary_if;
-- 
cgit v1.2.3


From 1a9070ec91b37234fe915849b767c61584c64a44 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 4 Mar 2017 15:48:50 +0100
Subject: batman-adv: Initialize gw sel_class via batadv_algo

The gateway selection class variable is shared between different algorithm
versions. But the interpretation of the content is algorithm specific. The
initialization is therefore also algorithm specific.

But this was implemented incorrectly and the initialization for BATMAN_V
always overwrote the value previously written for BATMAN_IV. This could
only be avoided when BATMAN_V was disabled during compile time.

Using a special batadv_algo hook for this initialization avoids this
problem.

Fixes: 50164d8f500f ("batman-adv: B.A.T.M.A.N. V - implement GW selection logic")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_iv_ogm.c     | 11 +++++++++++
 net/batman-adv/bat_v.c          | 14 +++++++++++---
 net/batman-adv/gateway_common.c |  5 +++++
 net/batman-adv/soft-interface.c |  1 -
 net/batman-adv/types.h          |  2 ++
 5 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f00f666e2ccd..7bfd0d7ef49d 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2477,6 +2477,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
 	batadv_iv_ogm_schedule(hard_iface);
 }
 
+/**
+ * batadv_iv_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
+{
+	/* set default TQ difference threshold to 20 */
+	atomic_set(&bat_priv->gw.sel_class, 20);
+}
+
 static struct batadv_gw_node *
 batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 {
@@ -2823,6 +2833,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 		.del_if = batadv_iv_ogm_orig_del_if,
 	},
 	.gw = {
+		.init_sel_class = batadv_iv_init_sel_class,
 		.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
 		.is_eligible = batadv_iv_gw_is_eligible,
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 2ac612d7bab4..2e2471ca84e3 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -668,6 +668,16 @@ err_ifinfo1:
 	return ret;
 }
 
+/**
+ * batadv_v_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
+{
+	/* set default throughput difference threshold to 5Mbps */
+	atomic_set(&bat_priv->gw.sel_class, 50);
+}
+
 static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
 					char *buff, size_t count)
 {
@@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 		.dump = batadv_v_orig_dump,
 	},
 	.gw = {
+		.init_sel_class = batadv_v_init_sel_class,
 		.store_sel_class = batadv_v_store_sel_class,
 		.show_sel_class = batadv_v_show_sel_class,
 		.get_best_gw_node = batadv_v_gw_get_best_gw_node,
@@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
 	if (ret < 0)
 		return ret;
 
-	/* set default throughput difference threshold to 5Mbps */
-	atomic_set(&bat_priv->gw.sel_class, 50);
-
 	return 0;
 }
 
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 21184810d89f..3e3f91ab694f 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
  */
 void batadv_gw_init(struct batadv_priv *bat_priv)
 {
+	if (bat_priv->algo_ops->gw.init_sel_class)
+		bat_priv->algo_ops->gw.init_sel_class(bat_priv);
+	else
+		atomic_set(&bat_priv->gw.sel_class, 1);
+
 	batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
 				     NULL, BATADV_TVLV_GW, 1,
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7b3494ae6ad9..2e0b3463ab4a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -820,7 +820,6 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
 #endif
 	atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
-	atomic_set(&bat_priv->gw.sel_class, 20);
 	atomic_set(&bat_priv->gw.bandwidth_down, 100);
 	atomic_set(&bat_priv->gw.bandwidth_up, 20);
 	atomic_set(&bat_priv->orig_interval, 1000);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index e913aee28c98..5137d859694c 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops {
 
 /**
  * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @init_sel_class: initialize GW selection class (optional)
  * @store_sel_class: parse and stores a new GW selection class (optional)
  * @show_sel_class: prints the current GW selection class (optional)
  * @get_best_gw_node: select the best GW from the list of available nodes
@@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops {
  * @dump: dump gateways to a netlink socket (optional)
  */
 struct batadv_algo_gw_ops {
+	void (*init_sel_class)(struct batadv_priv *bat_priv);
 	ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
 				   size_t count);
 	ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
-- 
cgit v1.2.3


From 3bec247474469f769af41e8c80d3a100dd97dd76 Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Wed, 22 Feb 2017 17:17:38 +0800
Subject: iio: hid-sensor-trigger: Change get poll value function order to
 avoid sensor properties losing after resume from S3

In function _hid_sensor_power_state(), when hid_sensor_read_poll_value()
is called, sensor's all properties will be updated by the value from
sensor hardware/firmware.
In some implementation, sensor hardware/firmware will do a power cycle
during S3. In this case, after resume, once hid_sensor_read_poll_value()
is called, sensor's all properties which are kept by driver during S3
will be changed to default value.
But instead, if a set feature function is called first, sensor
hardware/firmware will be recovered to the last status. So change the
sensor_hub_set_feature() calling order to behind of set feature function
to avoid sensor properties lose.

Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index a3cce3a38300..ecf592d69043 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -51,8 +51,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 			st->report_state.report_id,
 			st->report_state.index,
 			HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM);
-
-		poll_value = hid_sensor_read_poll_value(st);
 	} else {
 		int val;
 
@@ -89,7 +87,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 	sensor_hub_get_feature(st->hsdev, st->power_state.report_id,
 			       st->power_state.index,
 			       sizeof(state_val), &state_val);
-	if (state && poll_value)
+	if (state)
+		poll_value = hid_sensor_read_poll_value(st);
+	if (poll_value > 0)
 		msleep_interruptible(poll_value * 2);
 
 	return 0;
-- 
cgit v1.2.3


From 3ff861f59f6c1f5bf2bc03d2cd36ac3f992cbc06 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 15:37:57 -0800
Subject: iio: magnetometer: ak8974: remove incorrect __exit markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/magnetometer/ak8974.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c
index 6dd8cbd7ce95..e13370dc9b1c 100644
--- a/drivers/iio/magnetometer/ak8974.c
+++ b/drivers/iio/magnetometer/ak8974.c
@@ -763,7 +763,7 @@ power_off:
 	return ret;
 }
 
-static int __exit ak8974_remove(struct i2c_client *i2c)
+static int ak8974_remove(struct i2c_client *i2c)
 {
 	struct iio_dev *indio_dev = i2c_get_clientdata(i2c);
 	struct ak8974 *ak8974 = iio_priv(indio_dev);
@@ -845,7 +845,7 @@ static struct i2c_driver ak8974_driver = {
 		.of_match_table = of_match_ptr(ak8974_of_match),
 	},
 	.probe	  = ak8974_probe,
-	.remove	  = __exit_p(ak8974_remove),
+	.remove	  = ak8974_remove,
 	.id_table = ak8974_id,
 };
 module_i2c_driver(ak8974_driver);
-- 
cgit v1.2.3


From 9e10889a3177340dcda7d29c6d8fbd97247b007b Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Fri, 17 Feb 2017 16:12:50 +0100
Subject: Revert "ARM: at91/dt: sama5d2: Use new compatible for ohci node"

This reverts commit cab43282682e ("ARM: at91/dt: sama5d2: Use new
compatible for ohci node")

It depends from commit 7150bc9b4d43 ("usb: ohci-at91: Forcibly suspend
ports while USB suspend") which was reverted and implemented
differently. With the new implementation, the compatible string must
remain the same.

The compatible string introduced by this commit has been used in the
default SAMA5D2 dtsi starting from Linux 4.8. As it has never been
working correctly in an official release, removing it should not be
breaking the stability rules.

Fixes: cab43282682e ("ARM: at91/dt: sama5d2: Use new compatible for ohci node")
Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
cc: <stable@vger.kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/boot/dts/sama5d2.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 22332be72140..528b4e9c6d3d 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -266,7 +266,7 @@
 		};
 
 		usb1: ohci@00400000 {
-			compatible = "atmel,sama5d2-ohci", "usb-ohci";
+			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
 			reg = <0x00400000 0x100000>;
 			interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>;
 			clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
-- 
cgit v1.2.3


From 6c4f0fa643cb9e775dcc976e3db00d649468ff1d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 2 Mar 2017 14:03:20 +0530
Subject: cpufreq: schedutil: move cached_raw_freq to struct sugov_policy

cached_raw_freq applies to the entire cpufreq policy and not individual
CPUs. Apart from wasting per-cpu memory, it is actually wrong to keep it
in struct sugov_cpu as we may end up comparing next_freq with a stale
cached_raw_freq of a random CPU.

Move cached_raw_freq to struct sugov_policy.

Fixes: 5cbea46984d6 (cpufreq: schedutil: map raw required frequency to driver frequency)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 8f8de3d4d6b7..3ab2aeaec6ec 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -36,6 +36,7 @@ struct sugov_policy {
 	u64 last_freq_update_time;
 	s64 freq_update_delay_ns;
 	unsigned int next_freq;
+	unsigned int cached_raw_freq;
 
 	/* The next fields are only needed if fast switch cannot be used. */
 	struct irq_work irq_work;
@@ -52,7 +53,6 @@ struct sugov_cpu {
 	struct update_util_data update_util;
 	struct sugov_policy *sg_policy;
 
-	unsigned int cached_raw_freq;
 	unsigned long iowait_boost;
 	unsigned long iowait_boost_max;
 	u64 last_update;
@@ -146,9 +146,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
 
 	freq = (freq + (freq >> 2)) * util / max;
 
-	if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
 		return sg_policy->next_freq;
-	sg_cpu->cached_raw_freq = freq;
+	sg_policy->cached_raw_freq = freq;
 	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
@@ -580,6 +580,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 	sg_policy->next_freq = UINT_MAX;
 	sg_policy->work_in_progress = false;
 	sg_policy->need_freq_update = false;
+	sg_policy->cached_raw_freq = 0;
 
 	for_each_cpu(cpu, policy->cpus) {
 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
@@ -590,7 +591,6 @@ static int sugov_start(struct cpufreq_policy *policy)
 			sg_cpu->max = 0;
 			sg_cpu->flags = SCHED_CPUFREQ_RT;
 			sg_cpu->last_update = 0;
-			sg_cpu->cached_raw_freq = 0;
 			sg_cpu->iowait_boost = 0;
 			sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-- 
cgit v1.2.3


From 655cb1ebff4b7918fc560502c3297af2d3c7d114 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 2 Mar 2017 14:03:21 +0530
Subject: cpufreq: schedutil: Pass sg_policy to get_next_freq()

get_next_freq() uses sg_cpu only to get sg_policy, which the callers of
get_next_freq() already have. Pass sg_policy instead of sg_cpu to
get_next_freq(), to make it more efficient.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3ab2aeaec6ec..cd7cd489f739 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 
 /**
  * get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @sg_cpu: schedutil cpu object to compute the new frequency for.
+ * @sg_policy: schedutil policy object to compute the new frequency for.
  * @util: Current CPU utilization.
  * @max: CPU capacity.
  *
@@ -136,10 +136,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  * next_freq (as calculated above) is returned, subject to policy min/max and
  * cpufreq driver limitations.
  */
-static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
-				  unsigned long max)
+static unsigned int get_next_freq(struct sugov_policy *sg_policy,
+				  unsigned long util, unsigned long max)
 {
-	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
@@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	} else {
 		sugov_get_util(&util, &max);
 		sugov_iowait_boost(sg_cpu, &util, &max);
-		next_f = get_next_freq(sg_cpu, util, max);
+		next_f = get_next_freq(sg_policy, util, max);
 	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
@@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 		sugov_iowait_boost(j_sg_cpu, &util, &max);
 	}
 
-	return get_next_freq(sg_cpu, util, max);
+	return get_next_freq(sg_policy, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
-- 
cgit v1.2.3


From d82f26925599cae83c38d17d07ae982356e81318 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Feb 2017 16:44:16 -0500
Subject: cpufreq: Add the "cpufreq.off=1" cmdline option

Add the "cpufreq.off=1" cmdline option.

At boot-time, this allows a user to request CONFIG_CPU_FREQ=n
behavior from a kernel built with CONFIG_CPU_FREQ=y.

This is analogous to the existing "cpuidle.off=1" option
and CONFIG_CPU_IDLE=y

This capability is valuable when we need to debug end-user
issues in the BIOS or in Linux.  It is also convenient
for enabling comparisons, which may otherwise require a new kernel,
or help from BIOS SETUP, which may be buggy or unavailable.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 drivers/cpufreq/cpufreq.c                       | 1 +
 2 files changed, 4 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index be7c0d9506b1..3988d2311f97 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -662,6 +662,9 @@
 	cpuidle.off=1	[CPU_IDLE]
 			disable the cpuidle sub-system
 
+	cpufreq.off=1	[CPU_FREQ]
+			disable the cpufreq sub-system
+
 	cpu_init_udelay=N
 			[X86] Delay for N microsec between assert and de-assert
 			of APIC INIT to start processors.  This delay occurs
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 80a785ad17e8..7790db2645d7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2532,4 +2532,5 @@ static int __init cpufreq_core_init(void)
 
 	return 0;
 }
+module_param(off, int, 0444);
 core_initcall(cpufreq_core_init);
-- 
cgit v1.2.3


From cd59b4bed9d11a2aefc4bb44eed9de0e6c1eea06 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 1 Mar 2017 00:07:36 +0100
Subject: cpufreq: intel_pstate: Fix global settings in active mode

Commit 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all
limits settings in sync) changed intel_pstate to invoke
cpufreq_update_policy() for every registered CPU on global sysfs
attributes updates, but that led to undesirable effects in the
active mode if the "performance" P-state selection algorithm is
configufred for one CPU and the "powersave" one is chosen for
all of the other CPUs.

Namely, in that case, the following is possible:

 # cd /sys/devices/system/cpu/
 # cat intel_pstate/max_perf_pct
 100
 # cat intel_pstate/min_perf_pct
 26
 # echo performance > cpufreq/policy0/scaling_governor
 # cat intel_pstate/max_perf_pct
 100
 # cat intel_pstate/min_perf_pct
 100
 # echo 94 > intel_pstate/min_perf_pct
 # cat intel_pstate/min_perf_pct
 26

The reason why this happens is because intel_pstate attempts to
maintain two sets of global limits in the active mode, one for
the "performance" P-state selection algorithm and one for the
"powersave"  P-state selection algorithm, but the P-state selection
algorithms are set per policy, so the global limits cannot reflect
all of them at the same time if they are different for different
policies.

In the particular situation above, the attempt to change
min_perf_pct to 94 caused cpufreq_update_policy() to be run
for a CPU with the "powersave"  P-state selection algorithm
and intel_pstate_set_policy() called by it silently switched the
global limits to the "powersave" set which finally was reflected
by the sysfs interface.

To prevent that from happening, modify intel_pstate_update_policies()
to always switch back to the set of limits that was used right before
it has been invoked.

Fixes: 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5e066b332598..436a4c5ba70d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -973,11 +973,20 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
 }
 
 static void intel_pstate_update_policies(void)
+	__releases(&intel_pstate_limits_lock)
+	__acquires(&intel_pstate_limits_lock)
 {
+	struct perf_limits *saved_limits = limits;
 	int cpu;
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	for_each_possible_cpu(cpu)
 		cpufreq_update_policy(cpu);
+
+	mutex_lock(&intel_pstate_limits_lock);
+
+	limits = saved_limits;
 }
 
 /************************** debugfs begin ************************/
@@ -1185,10 +1194,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	limits->no_turbo = clamp_t(int, input, 0, 1);
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1222,10 +1231,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->max_perf_pct);
 	limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1259,10 +1268,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->min_perf_pct);
 	limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
-- 
cgit v1.2.3


From d74b19929130c9b5395a497030dcf161353cd526 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 1 Mar 2017 00:11:05 +0100
Subject: cpufreq: intel_pstate: Fix intel_pstate_verify_policy()

The code added to intel_pstate_verify_policy() by commit 1443ebbacfd7
(cpufreq: intel_pstate: Fix sysfs limits enforcement for performance
policy) should use perf_limits instead of limits, because otherwise
setting global limits via sysfs may affect policies inconsistently.

For example, in the sequence of shell commands below, the
scaling_min_freq attribute for policy1 and policy2 should be
affected in the same way, because scaling_governor is set in
the same way for both of them:

 # cat cpufreq/policy1/scaling_governor
 powersave
 # cat cpufreq/policy2/scaling_governor
 powersave
 # echo performance > cpufreq/policy0/scaling_governor
 # echo 94 > intel_pstate/min_perf_pct
 # cat cpufreq/policy0/scaling_min_freq
 2914000
 # cat cpufreq/policy1/scaling_min_freq
 2914000
 # cat cpufreq/policy2/scaling_min_freq
 800000

The are affected differently, because intel_pstate_verify_policy()
is invoked with limits set to &performance_limits (left behind by
policy0) for policy1 and with limits set to &powersave_limits (left
behind by policy1) for policy2.  Since perf_limits is set to the
set of limits matching the policy being updated, using it instead
of limits fixes the inconsistency.

Fixes: 1443ebbacfd7 (cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 436a4c5ba70d..3dc546601c88 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2212,9 +2212,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 		unsigned int max_freq, min_freq;
 
 		max_freq = policy->cpuinfo.max_freq *
-						limits->max_sysfs_pct / 100;
+					perf_limits->max_sysfs_pct / 100;
 		min_freq = policy->cpuinfo.max_freq *
-						limits->min_sysfs_pct / 100;
+					perf_limits->min_sysfs_pct / 100;
 		cpufreq_verify_within_limits(policy, min_freq, max_freq);
 	}
 
-- 
cgit v1.2.3


From a240c4aa5d0f9c8bb0db380ab9c1ec1f68b923ad Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Mar 2017 23:29:12 +0100
Subject: cpufreq: intel_pstate: Do not reinit performance limits in
 ->setpolicy

If the current P-state selection algorithm is set to "performance"
in intel_pstate_set_policy(), the limits may be initialized from
scratch, but only if no_turbo is not set and the maximum frequency
allowed for the given CPU (i.e. the policy object representing it)
is at least equal to the max frequency supported by the CPU.  In all
of the other cases, the limits will not be updated.

For example, the following can happen:

 # cat intel_pstate/status
 active
 # echo performance > cpufreq/policy0/scaling_governor
 # cat intel_pstate/min_perf_pct
 100
 # echo 94 > intel_pstate/min_perf_pct
 # cat intel_pstate/min_perf_pct
 100
 # cat cpufreq/policy0/scaling_max_freq
 3100000
 echo 3000000 > cpufreq/policy0/scaling_max_freq
 # cat intel_pstate/min_perf_pct
 94
 # echo 95 > intel_pstate/min_perf_pct
 # cat intel_pstate/min_perf_pct
 95

That is confusing for two reasons.  First, the initial attempt to
change min_perf_pct to 94 seems to have no effect, even though
setting the global limits should always work.  Second, after
changing scaling_max_freq for policy0 the global min_perf_pct
attribute shows 94, even though it should have not been affected
by that operation in principle.

Moreover, the final attempt to change min_perf_pct to 95 worked
as expected, because scaling_max_freq for the only policy with
scaling_governor equal to "performance" was different from the
maximum at that time.

To make all that confusion go away, modify intel_pstate_set_policy()
so that it doesn't reinitialize the limits at all.

At the same time, change intel_pstate_set_performance_limits() to
set min_sysfs_pct to 100 in the "performance" limits set so that
switching the P-state selection algorithm to "performance" causes
intel_pstate/min_perf_pct in sysfs to go to 100 (or whatever value
min_sysfs_pct in the "performance" limits is set to later).

That requires per-CPU limits to be initialized explicitly rather
than by copying the global limits to avoid setting min_sysfs_pct
in the per-CPU limits to 100.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3dc546601c88..f1f8fbe0b4c4 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -382,6 +382,7 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits)
 	intel_pstate_init_limits(limits);
 	limits->min_perf_pct = 100;
 	limits->min_perf = int_ext_tofp(1);
+	limits->min_sysfs_pct = 100;
 }
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
@@ -2146,16 +2147,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	mutex_lock(&intel_pstate_limits_lock);
 
 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		pr_debug("set performance\n");
 		if (!perf_limits) {
 			limits = &performance_limits;
 			perf_limits = limits;
 		}
-		if (policy->max >= policy->cpuinfo.max_freq &&
-		    !limits->no_turbo) {
-			pr_debug("set performance\n");
-			intel_pstate_set_performance_limits(perf_limits);
-			goto out;
-		}
 	} else {
 		pr_debug("set powersave\n");
 		if (!perf_limits) {
@@ -2166,7 +2162,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	}
 
 	intel_pstate_update_perf_limits(policy, perf_limits);
- out:
+
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		/*
 		 * NOHZ_FULL CPUs need this as the governor callback may not
@@ -2257,13 +2253,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	/*
-	 * We need sane value in the cpu->perf_limits, so inherit from global
-	 * perf_limits limits, which are seeded with values based on the
-	 * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
-	 */
 	if (per_cpu_limits)
-		memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
+		intel_pstate_init_limits(cpu->perf_limits);
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
-- 
cgit v1.2.3


From e51b999e11b84a766d78347afe9287e2c5b91c97 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 8 Feb 2017 15:37:11 -0500
Subject: ARM: dts: BCM5301X: Fix UARTs on bcm953012k

The UARTs are outputting garbage on the console.  This is due to a speed
issue.  We can simply use the clock speed (which is now defined in the
DTSI file) and everything works fine.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: cdc36b22 ("ARM: dts: enable clock support for BCM5301X")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm953012k.dts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts
index bfd923096a8c..da5aa8f5ffa7 100644
--- a/arch/arm/boot/dts/bcm953012k.dts
+++ b/arch/arm/boot/dts/bcm953012k.dts
@@ -53,10 +53,9 @@
 };
 
 &uart0 {
-	clock-frequency = <62499840>;
+	status = "okay";
 };
 
 &uart1 {
-	clock-frequency = <62499840>;
 	status = "okay";
 };
-- 
cgit v1.2.3


From 88d1fa70c21d7b431386cfe70cdc514d98b0c9c4 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 8 Feb 2017 15:37:12 -0500
Subject: ARM: dts: BCM5301X: Fix memory start address

Memory starts at 0x80000000, not 0.  0 "works" due to mirrior of the
first 128M of RAM to that address.  Anything greater than 128M will
quickly find nothing there.  Correcting the starting address has
everything working again.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: 7eb05f6d ("ARM: dts: bcm5301x: Add BCM SVK DT files")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm953012k.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts
index da5aa8f5ffa7..ae31a5826e91 100644
--- a/arch/arm/boot/dts/bcm953012k.dts
+++ b/arch/arm/boot/dts/bcm953012k.dts
@@ -48,7 +48,7 @@
 	};
 
 	memory {
-		reg = <0x00000000 0x10000000>;
+		reg = <0x80000000 0x10000000>;
 	};
 };
 
-- 
cgit v1.2.3


From 0c2bf9f95983fe30aa2f6463cb761cd42c2d521a Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Thu, 2 Mar 2017 19:21:32 -0500
Subject: ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GIC_PPI flags were misconfigured for the timers, resulting in errors
like:
[    0.000000] GIC: PPI11 is secure or misconfigured

Changing them to being edge triggered corrects the issue

Suggested-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: d27509f1 ("ARM: BCM5301X: add dts files for BCM4708 SoC")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm5301x.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index 4fbb089cf5ad..00de62dc0042 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -66,14 +66,14 @@
 		timer@20200 {
 			compatible = "arm,cortex-a9-global-timer";
 			reg = <0x20200 0x100>;
-			interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
 			clocks = <&periph_clk>;
 		};
 
 		local-timer@20600 {
 			compatible = "arm,cortex-a9-twd-timer";
 			reg = <0x20600 0x100>;
-			interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
 			clocks = <&periph_clk>;
 		};
 
-- 
cgit v1.2.3


From f38837b08d23e66de17d46d030e0d9ac5172ad1f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Sun, 5 Mar 2017 09:41:08 -0800
Subject: bpf: add get_next_key callback to LPM map

map_get_next_key callback is mandatory. Supply dummy handler.

Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/lpm_trie.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 8bfe0afaee10..b37bd9ab7f57 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -500,9 +500,15 @@ unlock:
 	raw_spin_unlock(&trie->lock);
 }
 
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
 static const struct bpf_map_ops trie_ops = {
 	.map_alloc = trie_alloc,
 	.map_free = trie_free,
+	.map_get_next_key = trie_get_next_key,
 	.map_lookup_elem = trie_lookup_elem,
 	.map_update_elem = trie_update_elem,
 	.map_delete_elem = trie_delete_elem,
-- 
cgit v1.2.3


From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 5 Mar 2017 12:34:49 -0800
Subject: net: phy: Do not perform software reset for Generic PHY

The Generic PHY driver is a catch-all PHY driver and it should preserve
whatever prior initialization has been done by boot loader or firmware
agents. For specific PHY device configuration it is expected that a
specialized PHY driver would take over that role.

Resetting the generic PHY was a bad idea that has lead to several
complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore
the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY
reset in phy_init_hw()").

Reported-by: Felix Fietkau <nbd@nbd.name>
Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 include/linux/phy.h          | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index daec6555f3b1..5198ccfa347f 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = {
 	.phy_id		= 0xffffffff,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Generic PHY",
-	.soft_reset	= genphy_soft_reset,
+	.soft_reset	= genphy_no_soft_reset,
 	.config_init	= genphy_config_init,
 	.features	= PHY_GBIT_FEATURES | SUPPORTED_MII |
 			  SUPPORTED_AUI | SUPPORTED_FIBRE |
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 772476028a65..43a774873aa9 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
 int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+	return 0;
+}
 void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
 int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
-- 
cgit v1.2.3


From ac8616e4c81dded650dfade49a7da283565d37ce Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Tue, 14 Feb 2017 11:35:22 +0800
Subject: clk: sunxi-ng: mp: Adjust parent rate for pre-dividers

The MP style clocks support an mux with pre-dividers. While the driver
correctly accounted for them in the .determine_rate callback, it did
not in the .recalc_rate and .set_rate callbacks.

This means when calculating the factors in the .set_rate callback, they
would be off by a factor of the active pre-divider. Same goes for
reading back the clock rate after it is set.

Cc: stable@vger.kernel.org
Fixes: 2ab836db5097 ("clk: sunxi-ng: Add M-P factor clock support")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_mp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c
index 22c2ca7a2a22..b583f186a804 100644
--- a/drivers/clk/sunxi-ng/ccu_mp.c
+++ b/drivers/clk/sunxi-ng/ccu_mp.c
@@ -85,6 +85,10 @@ static unsigned long ccu_mp_recalc_rate(struct clk_hw *hw,
 	unsigned int m, p;
 	u32 reg;
 
+	/* Adjust parent_rate according to pre-dividers */
+	ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux,
+						-1, &parent_rate);
+
 	reg = readl(cmp->common.base + cmp->common.reg);
 
 	m = reg >> cmp->m.shift;
@@ -117,6 +121,10 @@ static int ccu_mp_set_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned int m, p;
 	u32 reg;
 
+	/* Adjust parent_rate according to pre-dividers */
+	ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux,
+						-1, &parent_rate);
+
 	max_m = cmp->m.max ?: 1 << cmp->m.width;
 	max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1);
 
-- 
cgit v1.2.3


From 69c9ae5041309553472ee798d7683be31bcdc434 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:29:45 +0100
Subject: clk: sunxi: ccu-sun5i needs nkmp

A randconfig build ran into this rare link error:

drivers/clk/sunxi-ng/ccu-sun5i.o:(.data.__compound_literal.1+0x4): undefined reference to `ccu_nkmp_ops'
drivers/clk/sunxi-ng/ccu-sun5i.o:(.data.__compound_literal.7+0x4): undefined reference to `ccu_nkmp_ops'

This adds the missing 'select'.

Fixes: 5e73761786d6 ("clk: sunxi-ng: Add sun5i CCU driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 695bbf9ef428..72109d2cf41b 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -80,6 +80,7 @@ config SUN6I_A31_CCU
 	select SUNXI_CCU_DIV
 	select SUNXI_CCU_NK
 	select SUNXI_CCU_NKM
+	select SUNXI_CCU_NKMP
 	select SUNXI_CCU_NM
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
-- 
cgit v1.2.3


From 9ad0bb39fce319d7b92c17d306ed0a9f70a02e7d Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Tue, 14 Feb 2017 10:23:32 +0800
Subject: clk: sunxi-ng: sun6i: Fix enable bit offset for hdmi-ddc module clock

The enable bit offset for the hdmi-ddc module clock is wrong. It is
pointing to the main hdmi module clock enable bit.

Reported-by: Bob Ham <rah@settrans.net>
Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks")
Cc: stable@vger.kernel.org # 4.9.x-
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
index 4c9a920ff4ab..89e68d29bf45 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents,
 				 0x150, 0, 4, 24, 2, BIT(31),
 				 CLK_SET_RATE_PARENT);
 
-static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(31), 0);
+static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
 
 static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
 
-- 
cgit v1.2.3


From 39319f504b5d91e853f5ec7753a56e43915fcaf4 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 30 Nov 2016 12:05:03 +0100
Subject: ARM: sun8i: Fix the mali clock rate

The Mali clock rate was improperly assumed to be 408MHz, while it was
really 384Mhz, 408MHz being the "extreme" frequency, and definitely not
stable.

Switch for the stable, correct frequency for the GPU.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a23-a33.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
index a952cc0703cc..8a3ed21cb7bc 100644
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -495,7 +495,7 @@
 			resets = <&ccu RST_BUS_GPU>;
 
 			assigned-clocks = <&ccu CLK_GPU>;
-			assigned-clock-rates = <408000000>;
+			assigned-clock-rates = <384000000>;
 		};
 
 		gic: interrupt-controller@01c81000 {
-- 
cgit v1.2.3


From 36fc579761b50784b63dafd0f2e796b659e0f5ee Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Mon, 20 Feb 2017 16:25:45 +0100
Subject: drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel RSX-1058
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all
1.1.

When a sink that supports deep color is connected to the output, the
receiver will send EDIDs that advertise this capability, even if it
isn't possible with HDMI versions earlier than 1.3.

Currently the kernel is assuming that deep color is possible and the
sink displays an error.

This quirk will make sure that deep color isn't used with this
particular receiver.

Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"")
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com
Cc: stable@vger.kernel.org
Cc: Matt Horan <matt@matthoran.com>
Tested-by: Matt Horan <matt@matthoran.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/drm_edid.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index c8baab9bee0d..ba58f1b11d1e 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -148,6 +148,9 @@ static const struct edid_quirk {
 
 	/* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
 	{ "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
+
+	/* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/
+	{ "ETR", 13896, EDID_QUIRK_FORCE_8BPC },
 };
 
 /*
-- 
cgit v1.2.3


From 7369090a9fb57c3fc705ce355d2e4523a5a24716 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 31 Jan 2017 13:24:54 +0200
Subject: usb: dwc3: gadget: make Set Endpoint Configuration macros safe

Some gadget drivers are bad, bad boys. We notice
that ADB was passing bad Burst Size which caused top
bits of param0 to be overwritten which confused DWC3
when running this command.

In order to avoid future issues, we're going to make
sure values passed by macros are always safe for the
controller. Note that ADB still needs a fix to *not*
pass bad values.

Cc: <stable@vger.kernel.org> # v3.2+
Reported-by: Mohamed Abbas <mohamed.abbas@intel.com>
Sugested-by: Adam Andruszak <adam.andruszak@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index 3129bcf74d7d..265e223ab645 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -28,23 +28,23 @@ struct dwc3;
 #define gadget_to_dwc(g)	(container_of(g, struct dwc3, gadget))
 
 /* DEPCFG parameter 1 */
-#define DWC3_DEPCFG_INT_NUM(n)		((n) << 0)
+#define DWC3_DEPCFG_INT_NUM(n)		(((n) & 0x1f) << 0)
 #define DWC3_DEPCFG_XFER_COMPLETE_EN	(1 << 8)
 #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN	(1 << 9)
 #define DWC3_DEPCFG_XFER_NOT_READY_EN	(1 << 10)
 #define DWC3_DEPCFG_FIFO_ERROR_EN	(1 << 11)
 #define DWC3_DEPCFG_STREAM_EVENT_EN	(1 << 13)
-#define DWC3_DEPCFG_BINTERVAL_M1(n)	((n) << 16)
+#define DWC3_DEPCFG_BINTERVAL_M1(n)	(((n) & 0xff) << 16)
 #define DWC3_DEPCFG_STREAM_CAPABLE	(1 << 24)
-#define DWC3_DEPCFG_EP_NUMBER(n)	((n) << 25)
+#define DWC3_DEPCFG_EP_NUMBER(n)	(((n) & 0x1f) << 25)
 #define DWC3_DEPCFG_BULK_BASED		(1 << 30)
 #define DWC3_DEPCFG_FIFO_BASED		(1 << 31)
 
 /* DEPCFG parameter 0 */
-#define DWC3_DEPCFG_EP_TYPE(n)		((n) << 1)
-#define DWC3_DEPCFG_MAX_PACKET_SIZE(n)	((n) << 3)
-#define DWC3_DEPCFG_FIFO_NUMBER(n)	((n) << 17)
-#define DWC3_DEPCFG_BURST_SIZE(n)	((n) << 22)
+#define DWC3_DEPCFG_EP_TYPE(n)		(((n) & 0x3) << 1)
+#define DWC3_DEPCFG_MAX_PACKET_SIZE(n)	(((n) & 0x7ff) << 3)
+#define DWC3_DEPCFG_FIFO_NUMBER(n)	(((n) & 0x1f) << 17)
+#define DWC3_DEPCFG_BURST_SIZE(n)	(((n) & 0xf) << 22)
 #define DWC3_DEPCFG_DATA_SEQ_NUM(n)	((n) << 26)
 /* This applies for core versions earlier than 1.94a */
 #define DWC3_DEPCFG_IGN_SEQ_NUM		(1 << 31)
-- 
cgit v1.2.3


From 2bfa0719ac2a9b2f3c91345873d3cdebd0296ba9 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 31 Jan 2017 14:54:45 +0200
Subject: usb: gadget: function: f_fs: pass companion descriptor along

If we're dealing with SuperSpeed endpoints, we need
to make sure to pass along the companion descriptor
and initialize fields needed by the Gadget
API. Eventually, f_fs.c should be converted to use
config_ep_by_speed() like all other functions,
though.

Cc: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index a5b7cd615698..7e976f00cb02 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1834,11 +1834,14 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 	spin_lock_irqsave(&func->ffs->eps_lock, flags);
 	while(count--) {
 		struct usb_endpoint_descriptor *ds;
+		struct usb_ss_ep_comp_descriptor *comp_desc = NULL;
+		int needs_comp_desc = false;
 		int desc_idx;
 
-		if (ffs->gadget->speed == USB_SPEED_SUPER)
+		if (ffs->gadget->speed == USB_SPEED_SUPER) {
 			desc_idx = 2;
-		else if (ffs->gadget->speed == USB_SPEED_HIGH)
+			needs_comp_desc = true;
+		} else if (ffs->gadget->speed == USB_SPEED_HIGH)
 			desc_idx = 1;
 		else
 			desc_idx = 0;
@@ -1855,6 +1858,14 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 
 		ep->ep->driver_data = ep;
 		ep->ep->desc = ds;
+
+		comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
+				USB_DT_ENDPOINT_SIZE);
+		ep->ep->maxburst = comp_desc->bMaxBurst + 1;
+
+		if (needs_comp_desc)
+			ep->ep->comp_desc = comp_desc;
+
 		ret = usb_ep_enable(ep->ep);
 		if (likely(!ret)) {
 			epfile->ep = ep;
-- 
cgit v1.2.3


From cf3113d893d4427b166ec8695460efa7aa660923 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Fri, 17 Feb 2017 11:12:44 +0200
Subject: usb: dwc3: gadget: properly increment dequeue pointer on ep_dequeue

If request was already started, this means we had to
stop the transfer. With that we also need to ignore
all TRBs used by the request, however TRBs can only
be modified after completion of END_TRANSFER
command. So what we have to do here is wait for
END_TRANSFER completion and only after that jump
over TRBs by clearing HWO and incrementing dequeue
pointer.

Note that we have 2 possible types of transfers
here:

i) Linear buffer request
ii) SG-list based request

SG-list based requests will have r->num_pending_sgs
set to a valid number (> 0). Linear requests,
normally use a single TRB.

For each of these two cases, if r->unaligned flag is
set, one extra TRB has been used to align transfer
size to wMaxPacketSize.

All of these cases need to be taken into
consideration so we don't mess up our TRB ring
pointers.

Tested-by: Janusz Dziedzic <januszx.dziedzic@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 4db97ecae885..f875b3f4b0ec 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1342,6 +1342,68 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
 		if (r == req) {
 			/* wait until it is processed */
 			dwc3_stop_active_transfer(dwc, dep->number, true);
+
+			/*
+			 * If request was already started, this means we had to
+			 * stop the transfer. With that we also need to ignore
+			 * all TRBs used by the request, however TRBs can only
+			 * be modified after completion of END_TRANSFER
+			 * command. So what we do here is that we wait for
+			 * END_TRANSFER completion and only after that, we jump
+			 * over TRBs by clearing HWO and incrementing dequeue
+			 * pointer.
+			 *
+			 * Note that we have 2 possible types of transfers here:
+			 *
+			 * i) Linear buffer request
+			 * ii) SG-list based request
+			 *
+			 * SG-list based requests will have r->num_pending_sgs
+			 * set to a valid number (> 0). Linear requests,
+			 * normally use a single TRB.
+			 *
+			 * For each of these two cases, if r->unaligned flag is
+			 * set, one extra TRB has been used to align transfer
+			 * size to wMaxPacketSize.
+			 *
+			 * All of these cases need to be taken into
+			 * consideration so we don't mess up our TRB ring
+			 * pointers.
+			 */
+			wait_event_lock_irq(dep->wait_end_transfer,
+					!(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
+					dwc->lock);
+
+			if (!r->trb)
+				goto out1;
+
+			if (r->num_pending_sgs) {
+				struct dwc3_trb *trb;
+				int i = 0;
+
+				for (i = 0; i < r->num_pending_sgs; i++) {
+					trb = r->trb + i;
+					trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+					dwc3_ep_inc_deq(dep);
+				}
+
+				if (r->unaligned) {
+					trb = r->trb + r->num_pending_sgs + 1;
+					trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+					dwc3_ep_inc_deq(dep);
+				}
+			} else {
+				struct dwc3_trb *trb = r->trb;
+
+				trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+				dwc3_ep_inc_deq(dep);
+
+				if (r->unaligned) {
+					trb = r->trb + 1;
+					trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+					dwc3_ep_inc_deq(dep);
+				}
+			}
 			goto out1;
 		}
 		dev_err(dwc->dev, "request %p was not queued to %s\n",
@@ -1352,6 +1414,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
 
 out1:
 	/* giveback the request */
+	dep->queued_requests--;
 	dwc3_gadget_giveback(dep, req, -ECONNRESET);
 
 out0:
-- 
cgit v1.2.3


From 2e46565cf622dd0534a9d8bffe152a577b48d7aa Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 24 Feb 2017 19:11:28 +0100
Subject: USB: serial: digi_acceleport: fix OOB-event processing

A recent change claimed to fix an off-by-one error in the OOB-port
completion handler, but instead introduced such an error. This could
specifically led to modem-status changes going unnoticed, effectively
breaking TIOCMGET.

Note that the offending commit fixes a loop-condition underflow and is
marked for stable, but should not be backported without this fix.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity
check")
Cc: stable <stable@vger.kernel.org>	# v2.6.30: 2d380889215f
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/digi_acceleport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index ab78111e0968..6537d3ca2797 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb)
 		return -1;
 
 	/* handle each oob command */
-	for (i = 0; i < urb->actual_length - 4; i += 4) {
+	for (i = 0; i < urb->actual_length - 3; i += 4) {
 		opcode = buf[i];
 		line = buf[i + 1];
 		status = buf[i + 2];
-- 
cgit v1.2.3


From 8f1117abb408808af9cc4c948925c726bec4755a Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Mon, 6 Mar 2017 13:05:24 +0800
Subject: drm/i915/gvt: handle workload lifecycle properly

Currently i915 has a request replay mechanism which can make sure
the request can be replayed after a GPU reset. With this mechanism,
gvt should wait until the GVT request seqno passed before complete
the current workload. So that there should be a context switch interrupt
come before gvt free the workload. In this way, workload lifecylce
matches with the i915 request lifecycle. The workload can only be freed
after the request is completed.

v2: use gvt_dbg_sched instead of gvt_err to print when wait again

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 49 ++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index e355a82ccabd..d3a56c949025 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -151,6 +151,15 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	case INTEL_CONTEXT_SCHEDULE_OUT:
 		intel_gvt_restore_render_mmio(workload->vgpu,
 					      workload->ring_id);
+		/* If the status is -EINPROGRESS means this workload
+		 * doesn't meet any issue during dispatching so when
+		 * get the SCHEDULE_OUT set the status to be zero for
+		 * good. If the status is NOT -EINPROGRESS means there
+		 * is something wrong happened during dispatching and
+		 * the status should not be set to zero
+		 */
+		if (workload->status == -EINPROGRESS)
+			workload->status = 0;
 		atomic_set(&workload->shadow_ctx_active, 0);
 		break;
 	default:
@@ -362,15 +371,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 	workload = scheduler->current_workload[ring_id];
 	vgpu = workload->vgpu;
 
-	if (!workload->status && !vgpu->resetting) {
+	/* For the workload w/ request, needs to wait for the context
+	 * switch to make sure request is completed.
+	 * For the workload w/o request, directly complete the workload.
+	 */
+	if (workload->req) {
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
-		update_guest_context(workload);
+		i915_gem_request_put(fetch_and_zero(&workload->req));
 
-		for_each_set_bit(event, workload->pending_events,
-				 INTEL_GVT_EVENT_MAX)
-			intel_vgpu_trigger_virtual_event(vgpu, event);
+		if (!workload->status && !vgpu->resetting) {
+			update_guest_context(workload);
+
+			for_each_set_bit(event, workload->pending_events,
+					 INTEL_GVT_EVENT_MAX)
+				intel_vgpu_trigger_virtual_event(vgpu, event);
+		}
 	}
 
 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
@@ -400,7 +417,6 @@ static int workload_thread(void *priv)
 	int ring_id = p->ring_id;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload = NULL;
-	long lret;
 	int ret;
 	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -449,23 +465,24 @@ static int workload_thread(void *priv)
 
 		gvt_dbg_sched("ring id %d wait workload %p\n",
 				workload->ring_id, workload);
-
-		lret = i915_wait_request(workload->req,
+retry:
+		i915_wait_request(workload->req,
 					 0, MAX_SCHEDULE_TIMEOUT);
-		if (lret < 0) {
-			workload->status = lret;
-			gvt_err("fail to wait workload, skip\n");
-		} else {
-			workload->status = 0;
+		/* I915 has replay mechanism and a request will be replayed
+		 * if there is i915 reset. So the seqno will be updated anyway.
+		 * If the seqno is not updated yet after waiting, which means
+		 * the replay may still be in progress and we can wait again.
+		 */
+		if (!i915_gem_request_completed(workload->req)) {
+			gvt_dbg_sched("workload %p not completed, wait again\n",
+					workload);
+			goto retry;
 		}
 
 complete:
 		gvt_dbg_sched("will complete workload %p, status: %d\n",
 				workload, workload->status);
 
-		if (workload->req)
-			i915_gem_request_put(fetch_and_zero(&workload->req));
-
 		complete_current_workload(gvt, ring_id);
 
 		if (need_force_wake)
-- 
cgit v1.2.3


From c2e04fdab33181b53b5a2f9662b7b607b720f79f Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Mon, 6 Mar 2017 17:08:30 +0800
Subject: drm/i915/gvt: protect RO and Rsvd bits of virtual vgpu configuration
 space

Per PCI specification, Configuration Register has different types (RO,
RW, RW1C, Rsvd). For RO Register bits are read-only and cannot be
altered by software. For RW1C Register bits indicate status when read.
A Set bit indicates a status event which is Cleared by writing a 1b.
Writing a 0b to RW1C bits has no effect. Reserved Register is for future
implementations, and they are read-only and must return zero when read.

Current vGPU configuration write emulation just copy the value as it is.
So we haven't emulated RO, RW1C and Rsvd Registers correctly. This patch
is following the Spec to correct emulation logic. We add a function
vgpu_cfg_mem_write to wrap the access to vGPU configuration memory.
The write function uses a RW Register bitmap to avoid RO bits be
overwritten, and emulate RW1C behavior for the particular status Register.

v2:
  new = src[i] --> new = src[i] & mask (zhenyu)

Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: Xiaoguang Chen <xiaoguang.chen@intel.com>
Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>
Cc: Min He <min.he@intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c | 54 ++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index a77e050b85a3..b7d7721e72fa 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -41,6 +41,54 @@ enum {
 	INTEL_GVT_PCI_BAR_MAX,
 };
 
+/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one
+ * byte) byte by byte in standard pci configuration space. (not the full
+ * 256 bytes.)
+ */
+static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = {
+	[PCI_COMMAND]		= 0xff, 0x07,
+	[PCI_STATUS]		= 0x00, 0xf9, /* the only one RW1C byte */
+	[PCI_CACHE_LINE_SIZE]	= 0xff,
+	[PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff,
+	[PCI_ROM_ADDRESS]	= 0x01, 0xf8, 0xff, 0xff,
+	[PCI_INTERRUPT_LINE]	= 0xff,
+};
+
+/**
+ * vgpu_pci_cfg_mem_write - write virtual cfg space memory
+ *
+ * Use this function to write virtual cfg space memory.
+ * For standard cfg space, only RW bits can be changed,
+ * and we emulates the RW1C behavior of PCI_STATUS register.
+ */
+static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off,
+				   u8 *src, unsigned int bytes)
+{
+	u8 *cfg_base = vgpu_cfg_space(vgpu);
+	u8 mask, new, old;
+	int i = 0;
+
+	for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) {
+		mask = pci_cfg_space_rw_bmp[off + i];
+		old = cfg_base[off + i];
+		new = src[i] & mask;
+
+		/**
+		 * The PCI_STATUS high byte has RW1C bits, here
+		 * emulates clear by writing 1 for these bits.
+		 * Writing a 0b to RW1C bits has no effect.
+		 */
+		if (off + i == PCI_STATUS + 1)
+			new = (~new & old) & mask;
+
+		cfg_base[off + i] = (old & ~mask) | new;
+	}
+
+	/* For other configuration space directly copy as it is. */
+	if (i < bytes)
+		memcpy(cfg_base + off + i, src + i, bytes - i);
+}
+
 /**
  * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read
  *
@@ -123,7 +171,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
 	u8 changed = old ^ new;
 	int ret;
 
-	memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+	vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
 	if (!(changed & PCI_COMMAND_MEMORY))
 		return 0;
 
@@ -277,10 +325,10 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 		if (ret)
 			return ret;
 
-		memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
 		break;
 	default:
-		memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
 		break;
 	}
 	return 0;
-- 
cgit v1.2.3


From 68925176296a8b995e503349200e256674bfe5ac Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 17 Feb 2017 14:32:18 +0000
Subject: arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs

When invalidating guest TLBs, special care must be taken to
actually shoot the guest TLBs and not the host ones if we're
running on a VHE system.  This is controlled by the HCR_EL2.TGE
bit, which we forget to clear before invalidating TLBs.

Address the issue by introducing two wrappers (__tlb_switch_to_guest
and __tlb_switch_to_host) that take care of both the VTTBR_EL2
and HCR_EL2.TGE switching.

Reported-by: Tomasz Nowicki <tnowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tnowicki@caviumnetworks.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/tlb.c | 64 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index e8e7ba2bc11f..9e1d2b75eecd 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,14 +18,62 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+	u64 val;
+
+	/*
+	 * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+	 * most TLB operations target EL2/EL0. In order to affect the
+	 * guest TLBs (EL1/EL0), we need to change one of these two
+	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+	 * let's flip TGE before executing the TLB operation.
+	 */
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	val = read_sysreg(hcr_el2);
+	val &= ~HCR_TGE;
+	write_sysreg(val, hcr_el2);
+	isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+			    __tlb_switch_to_guest_nvhe,
+			    __tlb_switch_to_guest_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+	/*
+	 * We're done with the TLB operation, let's restore the host's
+	 * view of HCR_EL2.
+	 */
+	write_sysreg(0, vttbr_el2);
+	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+	write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+			    __tlb_switch_to_host_nvhe,
+			    __tlb_switch_to_host_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	dsb(ish);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
 
 	/* Switch to requested VMID */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
-- 
cgit v1.2.3


From 4dfc050571523ac2bc02cbf948dd47621f7dd83f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Feb 2017 11:32:47 +0000
Subject: KVM: arm/arm64: vgic-v3: Don't pretend to support IRQ/FIQ bypass

Our GICv3 emulation always presents ICC_SRE_EL1 with DIB/DFB set to
zero, which implies that there is a way to bypass the GIC and
inject raw IRQ/FIQ by driving the CPU pins.

Of course, we don't allow that when the GIC is configured, but
we fail to indicate that to the guest. The obvious fix is to
set these bits (and never let them being changed again).

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 ++
 virt/kvm/arm/vgic/vgic-v3.c        | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 672cfef72fc8..97cbca19430d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -373,6 +373,8 @@
 #define ICC_IGRPEN0_EL1_MASK		(1 << ICC_IGRPEN0_EL1_SHIFT)
 #define ICC_IGRPEN1_EL1_SHIFT		0
 #define ICC_IGRPEN1_EL1_MASK		(1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB			(1U << 2)
+#define ICC_SRE_EL1_DFB			(1U << 1)
 #define ICC_SRE_EL1_SRE			(1U << 0)
 
 /*
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index edc6ee2dc852..be0f4c3e0142 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 	/*
 	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
 	 * way, so we force SRE to 1 to demonstrate this to the guest.
+	 * Also, we don't support any form of IRQ/FIQ bypass.
 	 * This goes with the spec allowing the value to be RAO/WI.
 	 */
 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
-		vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+		vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+				     ICC_SRE_EL1_DFB |
+				     ICC_SRE_EL1_SRE);
 		vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
 	} else {
 		vgic_v3->vgic_sre = 0;
-- 
cgit v1.2.3


From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Fri, 3 Mar 2017 11:58:44 +1100
Subject: powerpc/xics: Work around limitations of OPAL XICS priority handling

The CPPR (Current Processor Priority Register) of a XICS interrupt
presentation controller contains a value N, such that only interrupts
with a priority "more favoured" than N will be received by the CPU,
where "more favoured" means "less than". So if the CPPR has the value 5
then only interrupts with a priority of 0-4 inclusive will be received.

In theory the CPPR can support a value of 0 to 255 inclusive.
In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR
to 0 rejects all interrupts, setting it to 0xff allows all interrupts.
The values 4 and 5 are used to differentiate IPIs from external
interrupts. Setting the CPPR to 5 allows IPIs to be received but not
external interrupts.

The CPPR emulation in the OPAL XICS implementation only directly
supports priorities 0 and 0xff. All other priorities are considered
equivalent, and mapped to a single priority value internally. This means
when using icp-opal we can not allow IPIs but not externals.

This breaks Linux's use of priority values when a CPU is hot unplugged.
After migrating IRQs away from the CPU that is being offlined, we set
the priority to 5, meaning we still want the offline CPU to receive
IPIs. But the effect of the OPAL XICS emulation's use of a single
priority value is that all interrupts are rejected by the CPU. With the
CPU offline, and not receiving IPIs, we may not be able to wake it up to
bring it back online.

The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values
of 0 to 4 inclusive will correctly cause all interrupts to be rejected,
so we pass those CPPR values through to OPAL. However if we are called
with a CPPR of 5 or greater, the caller is expecting to be able to allow
IPIs but not external interrupts. We know this doesn't work, so instead
of rejecting all interrupts we choose the opposite which is to allow all
interrupts. This is still not correct behaviour, but we know for the
only existing caller (xics_migrate_irqs_away()), that it is the better
option.

The other part of the fix is in xics_migrate_irqs_away(). Instead of
setting priority (CPPR) to 0, and then back to 5 before migrating IRQs,
we migrate the IRQs before setting the priority back to 5. This should
have no effect on an ICP backend with a working set_priority(), and on
icp-opal it means we will keep all interrupts blocked until after we've
finished doing the IRQ migration. Additionally we wait for 5ms after
doing the migration to make sure there are no IRQs in flight.

Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend")
Cc: stable@vger.kernel.org # v4.8+
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Reported-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Tested-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
[mpe: Rewrote comments and change log, change delay to 5ms]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xics/icp-opal.c    | 10 ++++++++++
 arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index f9670eabfcfa..b53f80f0b4d8 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void)
 
 static void icp_opal_set_cpu_priority(unsigned char cppr)
 {
+	/*
+	 * Here be dragons. The caller has asked to allow only IPI's and not
+	 * external interrupts. But OPAL XIVE doesn't support that. So instead
+	 * of allowing no interrupts allow all. That's still not right, but
+	 * currently the only caller who does this is xics_migrate_irqs_away()
+	 * and it works in that case.
+	 */
+	if (cppr >= DEFAULT_PRIORITY)
+		cppr = LOWEST_PRIORITY;
+
 	xics_set_base_cppr(cppr);
 	opal_int_set_cppr(cppr);
 	iosync();
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 69d858e51ac7..23efe4e42172 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/delay.h>
 
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void)
 	/* Remove ourselves from the global interrupt queue */
 	xics_set_cpu_giq(xics_default_distrib_server, 0);
 
-	/* Allow IPIs again... */
-	icp_ops->set_priority(DEFAULT_PRIORITY);
-
 	for_each_irq_desc(virq, desc) {
 		struct irq_chip *chip;
 		long server;
@@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void)
 unlock:
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
+
+	/* Allow "sufficient" time to drop any inflight IRQ's */
+	mdelay(5);
+
+	/*
+	 * Allow IPIs again. This is done at the very end, after migrating all
+	 * interrupts, the expectation is that we'll only get woken up by an IPI
+	 * interrupt beyond this point, but leave externals masked just to be
+	 * safe. If we're using icp-opal this may actually allow all
+	 * interrupts anyway, but that should be OK.
+	 */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-- 
cgit v1.2.3


From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Tue, 28 Feb 2017 17:03:47 +1100
Subject: powerpc: Parse the command line before calling CAS

On POWER9 the hypervisor requires the guest to decide whether it would
like to use a hash or radix mmu model at the time it calls
ibm,client-architecture-support (CAS) based on what the hypervisor has
said it's allowed to do. It is possible to disable radix by passing
"disable_radix" on the command line. The next patch will add support for
the new CAS format, thus we need to parse the command line before calling
CAS so we can correctly select which mmu we would like to use.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a3944540fe0d..bf3966d12565 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 */
 	prom_check_initrd(r3, r4);
 
+	/*
+	 * Do early parsing of command line
+	 */
+	early_cmdline_parse();
+
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
 	/*
 	 * On pSeries, inform the firmware about our capabilities
@@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	if (of_platform != PLATFORM_POWERMAC)
 		copy_and_flush(0, kbase, 0x100, 0);
 
-	/*
-	 * Do early parsing of command line
-	 */
-	early_cmdline_parse();
-
 	/*
 	 * Initialize memory management within prom_init
 	 */
-- 
cgit v1.2.3


From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Tue, 28 Feb 2017 17:03:48 +1100
Subject: powerpc: Update to new option-vector-5 format for CAS

On POWER9 the ibm,client-architecture-support (CAS) negotiation process
has been updated to change how the host to guest negotiation is done for
the new hash/radix mmu as well as the nest mmu, process tables and guest
translation shootdown (GTSE).

This is documented in the unreleased PAPR ACR "CAS option vector
additions for P9".

The host tells the guest which options it supports in
ibm,arch-vec-5-platform-support. The guest then chooses a subset of these
to request in the CAS call and these are agreed to in the
ibm,architecture-vec-5 property of the chosen node.

Thus we read ibm,arch-vec-5-platform-support and make our selection before
calling CAS. We then parse the ibm,architecture-vec-5 property of the
chosen node to check whether we should run as hash or radix.

ibm,arch-vec-5-platform-support format:

index value pairs: <index, val> ... <index, val>

index: Option vector 5 byte number
val:   Some representation of supported values

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
[mpe: Don't print about unknown options, be consistent with OV5_FEAT]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/prom.h |  18 ++++---
 arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++++++++++-
 arch/powerpc/mm/init_64.c       |  36 ++++++++++---
 3 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 4a90634e8322..35c00d7a0cf8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -160,12 +160,18 @@ struct of_drconf_cell {
 #define OV5_PFO_HW_ENCR		0x1120	/* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS	0x1501	/* 1,2,or 4 Sub-Processors supported */
 #define OV5_XIVE_EXPLOIT	0x1701	/* XIVE exploitation supported */
-#define OV5_MMU_RADIX_300	0x1880	/* ISA v3.00 radix MMU supported */
-#define OV5_MMU_HASH_300	0x1840	/* ISA v3.00 hash MMU supported */
-#define OV5_MMU_SEGM_RADIX	0x1820	/* radix mode (no segmentation) */
-#define OV5_MMU_PROC_TBL	0x1810	/* hcall selects SLB or proc table */
-#define OV5_MMU_SLB		0x1800	/* always use SLB */
-#define OV5_MMU_GTSE		0x1808	/* Guest translation shootdown */
+/* MMU Base Architecture */
+#define OV5_MMU_SUPPORT		0x18C0	/* MMU Mode Support Mask */
+#define OV5_MMU_HASH		0x1800	/* Hash MMU Only */
+#define OV5_MMU_RADIX		0x1840	/* Radix MMU Only */
+#define OV5_MMU_EITHER		0x1880	/* Hash or Radix Supported */
+#define OV5_MMU_DYNAMIC		0x18C0	/* Hash or Radix Can Switch Later */
+#define OV5_NMMU		0x1820	/* Nest MMU Available */
+/* Hash Table Extensions */
+#define OV5_HASH_SEG_TBL	0x1980	/* In Memory Segment Tables Available */
+#define OV5_HASH_GTSE		0x1940	/* Guest Translation Shoot Down Avail */
+/* Radix Table Extensions */
+#define OV5_RADIX_GTSE		0x1A40	/* Guest Translation Shoot Down Avail */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX		0x02	/* Linux is our OS */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index bf3966d12565..1c1b44ec7642 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
+static bool __initdata prom_radix_disable;
+
+struct platform_support {
+	bool hash_mmu;
+	bool radix_mmu;
+	bool radix_gtse;
+};
+
 /* Platforms codes are now obsolete in the kernel. Now only used within this
  * file and ultimately gone too. Feel free to change them if you need, they
  * are not shared with anything outside of this file anymore
@@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void)
 		prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
 #endif
 	}
+
+	opt = strstr(prom_cmd_line, "disable_radix");
+	if (opt) {
+		prom_debug("Radix disabled from cmdline\n");
+		prom_radix_disable = true;
+	}
 }
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -695,6 +709,8 @@ struct option_vector5 {
 	u8 byte22;
 	u8 intarch;
 	u8 mmu;
+	u8 hash_ext;
+	u8 radix_ext;
 } __packed;
 
 struct option_vector6 {
@@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
 		.reserved3 = 0,
 		.subprocessors = 1,
 		.intarch = 0,
-		.mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
-			OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
+		.mmu = 0,
+		.hash_ext = 0,
+		.radix_ext = 0,
 	},
 
 	/* option vector 6: IBM PAPR hints */
@@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void)
 
 }
 
+static void __init prom_parse_mmu_model(u8 val,
+					struct platform_support *support)
+{
+	switch (val) {
+	case OV5_FEAT(OV5_MMU_DYNAMIC):
+	case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */
+		prom_debug("MMU - either supported\n");
+		support->radix_mmu = !prom_radix_disable;
+		support->hash_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */
+		prom_debug("MMU - radix only\n");
+		if (prom_radix_disable) {
+			/*
+			 * If we __have__ to do radix, we're better off ignoring
+			 * the command line rather than not booting.
+			 */
+			prom_printf("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		support->radix_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_HASH):
+		prom_debug("MMU - hash only\n");
+		support->hash_mmu = true;
+		break;
+	default:
+		prom_debug("Unknown mmu support option: 0x%x\n", val);
+		break;
+	}
+}
+
+static void __init prom_parse_platform_support(u8 index, u8 val,
+					       struct platform_support *support)
+{
+	switch (index) {
+	case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */
+		prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
+		break;
+	case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
+		if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
+			prom_debug("Radix - GTSE supported\n");
+			support->radix_gtse = true;
+		}
+		break;
+	}
+}
+
+static void __init prom_check_platform_support(void)
+{
+	struct platform_support supported = {
+		.hash_mmu = false,
+		.radix_mmu = false,
+		.radix_gtse = false
+	};
+	int prop_len = prom_getproplen(prom.chosen,
+				       "ibm,arch-vec-5-platform-support");
+	if (prop_len > 1) {
+		int i;
+		u8 vec[prop_len];
+		prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
+			   prop_len);
+		prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
+			     &vec, sizeof(vec));
+		for (i = 0; i < prop_len; i += 2) {
+			prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
+								  , vec[i]
+								  , vec[i + 1]);
+			prom_parse_platform_support(vec[i], vec[i + 1],
+						    &supported);
+		}
+	}
+
+	if (supported.radix_mmu && supported.radix_gtse) {
+		/* Radix preferred - but we require GTSE for now */
+		prom_debug("Asking for radix with GTSE\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
+		ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+	} else if (supported.hash_mmu) {
+		/* Default to hash mmu (if we can) */
+		prom_debug("Asking for hash\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH);
+	} else {
+		/* We're probably on a legacy hypervisor */
+		prom_debug("Assuming legacy hash support\n");
+	}
+}
 
 static void __init prom_send_capabilities(void)
 {
@@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void)
 	prom_arg_t ret;
 	u32 cores;
 
+	/* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */
+	prom_check_platform_support();
+
 	root = call_prom("open", 1, 1, ADDR("/"));
 	if (root != 0) {
 		/* We need to tell the FW about the number of cores we support.
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6aa3b76aa0d6..9be992083d2a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -356,18 +356,42 @@ static void early_check_vec5(void)
 	unsigned long root, chosen;
 	int size;
 	const u8 *vec5;
+	u8 mmu_supported;
 
 	root = of_get_flat_dt_root();
 	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
-	if (chosen == -FDT_ERR_NOTFOUND)
+	if (chosen == -FDT_ERR_NOTFOUND) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 		return;
+	}
 	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
-	if (!vec5)
+	if (!vec5) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 		return;
-	if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
-	    !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
-		/* Hypervisor doesn't support radix */
+	}
+	if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		return;
+	}
+
+	/* Check for supported configuration */
+	mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
+			OV5_FEAT(OV5_MMU_SUPPORT);
+	if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
+		/* Hypervisor only supports radix - check enabled && GTSE */
+		if (!early_radix_enabled()) {
+			pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
+						OV5_FEAT(OV5_RADIX_GTSE))) {
+			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
+		}
+		/* Do radix anyway - the hypervisor said we had to */
+		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
+		/* Hypervisor only supports hash - disable radix */
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+	}
 }
 
 void __init mmu_early_init_devtree(void)
@@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void)
 	 * even though the ibm,architecture-vec-5 property created by
 	 * skiboot doesn't have the necessary bits set.
 	 */
-	if (early_radix_enabled() && !(mfmsr() & MSR_HV))
+	if (!(mfmsr() & MSR_HV))
 		early_check_vec5();
 
 	if (early_radix_enabled())
-- 
cgit v1.2.3


From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 5 Mar 2017 10:54:34 +1100
Subject: powerpc/64: Avoid panic during boot due to divide by zero in
 init_cache_info()

I see a panic in early boot when building with a recent gcc toolchain.
The issue is a divide by zero, which is undefined. Older toolchains
let us get away with it:

int foo(int a) { return a / 0; }

foo:
	li 9,0
	divw 3,3,9
	extsw 3,3
	blr

But newer ones catch it:

foo:
	trap

Add a check to avoid the divide by zero.

Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache")
Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index adf2084f214b..9cfaa8b69b5f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
 	info->line_size = lsize;
 	info->block_size = bsize;
 	info->log_block_size = __ilog2(bsize);
-	info->blocks_per_page = PAGE_SIZE / bsize;
+	if (bsize)
+		info->blocks_per_page = PAGE_SIZE / bsize;
+	else
+		info->blocks_per_page = 0;
 
 	if (sets == 0)
 		info->assoc = 0xffff;
-- 
cgit v1.2.3


From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Sun, 5 Mar 2017 19:16:44 +0100
Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk

Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang
when it should reboot. This adds the appropriate quirk, thus
fixing the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 01c9cda3e1b7..4194d6f9bb29 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TAW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */
-- 
cgit v1.2.3


From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 6 Mar 2017 10:57:48 +0200
Subject: x86/build/x86_64_defconfig: Enable CONFIG_R8169

Very common PCIe ethernet card. Already enabled in i386_defconfig.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ef4a099defc..6205d3b81e6d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -176,6 +176,7 @@ CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
+CONFIG_R8169=y
 CONFIG_FDDI=y
 CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-- 
cgit v1.2.3


From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 6 Mar 2017 21:51:32 +1100
Subject: powerpc/64: Fix L1D cache shape vector reporting L1I values

It seems we didn't pay quite enough attention when testing the new cache
shape vectors, which means we didn't notice the bug where the vector for
the L1D was using the L1I values. Fix it, resulting in eg:

  L1I  cache size:     0x8000      32768B         32K
  L1I  line size:        0x80       8-way associative
  L1D  cache size:    0x10000      65536B         64K
  L1D  line size:        0x80       8-way associative

Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors")
Cut-and-paste-bug-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Badly-reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/elf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 93b9b84568e8..09bde6e34f5d 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 #define ARCH_DLINFO_CACHE_GEOMETRY					\
 	NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size);		\
 	NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i));	\
-	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size);		\
-	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i));	\
+	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size);		\
+	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d));	\
 	NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size);		\
 	NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2));	\
 	NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size);		\
-- 
cgit v1.2.3


From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 6 Mar 2017 22:53:59 +1100
Subject: powerpc: Sort the selects under CONFIG_PPC

We have a big list of selects under CONFIG_PPC, and currently they're
completely unsorted. This means people tend to add new selects at the
bottom of the list, and so two commits which both add a new select will
often conflict.

Instead sort it alphabetically. This is nicer in and of itself, but also
means two commits that add a new select will have a greater chance of
not conflicting.

Add a note at the top and bottom asking people to keep it sorted.

And while we're here pad out the 'if' expressions to make them stand
out.

Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 138 +++++++++++++++++++++++++++------------------------
 1 file changed, 72 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 494091762bd7..97a8bc8a095c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK
 config PPC
 	bool
 	default y
-	select BUILDTIME_EXTABLE_SORT
+	#
+	# Please keep this list sorted alphabetically.
+	#
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
+	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+	select ARCH_WANT_IPC_PARSE_VERSION
 	select BINFMT_ELF
-	select ARCH_HAS_ELF_RANDOMIZE
-	select OF
-	select OF_EARLY_FLATTREE
-	select OF_RESERVED_MEM
-	select HAVE_FTRACE_MCOUNT_RECORD
+	select BUILDTIME_EXTABLE_SORT
+	select CLONE_BACKWARDS
+	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
+	select EDAC_ATOMIC_SCRUB
+	select EDAC_SUPPORT
+	select GENERIC_ATOMIC64			if PPC32
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
+	select GENERIC_CMOS_UPDATE
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select GENERIC_TIME_VSYSCALL_OLD
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HARDENED_USERCOPY
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_CBPF_JIT			if !PPC64
+	select HAVE_CONTEXT_TRACKING		if PPC64
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
-	select HAVE_FUNCTION_TRACER
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL
+	select HAVE_EBPF_JIT			if PPC64
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS	if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
-	select SYSCTL_EXCEPTION_TRACE
-	select VIRT_TO_BUS if !PPC64
+	select HAVE_GENERIC_RCU_GUP
+	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
-	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_IRQ_EXIT_ON_IRQ_STACK
+	select HAVE_KERNEL_GZIP
 	select HAVE_KPROBES
-	select HAVE_OPTPROBES if PPC64
-	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
-	select HAVE_ARCH_TRACEHOOK
+	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select HAVE_DMA_API_DEBUG
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI				if PERF_EVENTS
 	select HAVE_OPROFILE
-	select HAVE_DEBUG_KMEMLEAK
-	select ARCH_HAS_SG_CHAIN
-	select GENERIC_ATOMIC64 if PPC32
+	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_RCU_TABLE_FREE		if SMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-	select ARCH_WANT_IPC_PARSE_VERSION
-	select SPARSE_IRQ
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING
 	select IRQ_DOMAIN
-	select GENERIC_IRQ_SHOW
-	select GENERIC_IRQ_SHOW_LEVEL
 	select IRQ_FORCED_THREADING
-	select HAVE_RCU_TABLE_FREE if SMP
-	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_CBPF_JIT if !PPC64
-	select HAVE_EBPF_JIT if PPC64
-	select HAVE_ARCH_JUMP_LABEL
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
-	select ARCH_HAS_GCOV_PROFILE_ALL
-	select GENERIC_SMP_IDLE_THREAD
-	select GENERIC_CMOS_UPDATE
-	select GENERIC_TIME_VSYSCALL_OLD
-	select GENERIC_CLOCKEVENTS
-	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
-	select GENERIC_STRNCPY_FROM_USER
-	select GENERIC_STRNLEN_USER
-	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select CLONE_BACKWARDS
-	select ARCH_USE_BUILTIN_BSWAP
-	select OLD_SIGSUSPEND
-	select OLD_SIGACTION if PPC32
-	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_IRQ_EXIT_ON_IRQ_STACK
-	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
-	select HAVE_ARCH_AUDITSYSCALL
-	select ARCH_SUPPORTS_ATOMIC_RMW
-	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 	select NO_BOOTMEM
-	select HAVE_GENERIC_RCU_GUP
-	select HAVE_PERF_EVENTS_NMI if PPC64
-	select HAVE_NMI if PERF_EVENTS
-	select EDAC_SUPPORT
-	select EDAC_ATOMIC_SCRUB
-	select ARCH_HAS_DMA_SET_COHERENT_MASK
-	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select HAVE_ARCH_SECCOMP_FILTER
-	select ARCH_HAS_UBSAN_SANITIZE_ALL
-	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
-	select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select GENERIC_CPU_AUTOPROBE
-	select HAVE_VIRT_CPU_ACCOUNTING
-	select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
-	select HAVE_ARCH_HARDENED_USERCOPY
-	select HAVE_KERNEL_GZIP
-	select HAVE_CONTEXT_TRACKING if PPC64
+	select OF
+	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
+	select OLD_SIGACTION			if PPC32
+	select OLD_SIGSUSPEND
+	select SPARSE_IRQ
+	select SYSCTL_EXCEPTION_TRACE
+	select VIRT_TO_BUS			if !PPC64
+	#
+	# Please keep this list sorted alphabetically.
+	#
 
 config GENERIC_CSUM
 	def_bool n
-- 
cgit v1.2.3


From 606142af57dad981b78707234cfbd15f9f7b7125 Mon Sep 17 00:00:00 2001
From: Jonathan McDowell <noodles@earth.li>
Date: Wed, 15 Feb 2017 18:29:15 -0200
Subject: [media] dw2102: don't do DMA on stack

On Kernel 4.9, WARNINGs about doing DMA on stack are hit at
the dw2102 driver: one in su3000_power_ctrl() and the other in tt_s2_4600_frontend_attach().

Both were due to the use of buffers on the stack as parameters to
dvb_usb_generic_rw() and the resulting attempt to do DMA with them.

The device was non-functional as a result.

So, switch this driver over to use a buffer within the device state
structure, as has been done with other DVB-USB drivers.

Tested with TechnoTrend TT-connect S2-4600.

[mchehab@osg.samsung.com: fixed a warning at su3000_i2c_transfer() that
 state var were dereferenced before check 'd']
Signed-off-by: Jonathan McDowell <noodles@earth.li>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/usb/dvb-usb/dw2102.c | 244 ++++++++++++++++++++++---------------
 1 file changed, 145 insertions(+), 99 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
index 6ca502d834b4..4f42d57f81d9 100644
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -68,6 +68,7 @@
 struct dw2102_state {
 	u8 initialized;
 	u8 last_lock;
+	u8 data[MAX_XFER_SIZE + 4];
 	struct i2c_client *i2c_client_demod;
 	struct i2c_client *i2c_client_tuner;
 
@@ -661,62 +662,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[],
 								int num)
 {
 	struct dvb_usb_device *d = i2c_get_adapdata(adap);
-	u8 obuf[0x40], ibuf[0x40];
+	struct dw2102_state *state;
 
 	if (!d)
 		return -ENODEV;
+
+	state = d->priv;
+
 	if (mutex_lock_interruptible(&d->i2c_mutex) < 0)
 		return -EAGAIN;
+	if (mutex_lock_interruptible(&d->data_mutex) < 0) {
+		mutex_unlock(&d->i2c_mutex);
+		return -EAGAIN;
+	}
 
 	switch (num) {
 	case 1:
 		switch (msg[0].addr) {
 		case SU3000_STREAM_CTRL:
-			obuf[0] = msg[0].buf[0] + 0x36;
-			obuf[1] = 3;
-			obuf[2] = 0;
-			if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0)
+			state->data[0] = msg[0].buf[0] + 0x36;
+			state->data[1] = 3;
+			state->data[2] = 0;
+			if (dvb_usb_generic_rw(d, state->data, 3,
+					state->data, 0, 0) < 0)
 				err("i2c transfer failed.");
 			break;
 		case DW2102_RC_QUERY:
-			obuf[0] = 0x10;
-			if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0)
+			state->data[0] = 0x10;
+			if (dvb_usb_generic_rw(d, state->data, 1,
+					state->data, 2, 0) < 0)
 				err("i2c transfer failed.");
-			msg[0].buf[1] = ibuf[0];
-			msg[0].buf[0] = ibuf[1];
+			msg[0].buf[1] = state->data[0];
+			msg[0].buf[0] = state->data[1];
 			break;
 		default:
 			/* always i2c write*/
-			obuf[0] = 0x08;
-			obuf[1] = msg[0].addr;
-			obuf[2] = msg[0].len;
+			state->data[0] = 0x08;
+			state->data[1] = msg[0].addr;
+			state->data[2] = msg[0].len;
 
-			memcpy(&obuf[3], msg[0].buf, msg[0].len);
+			memcpy(&state->data[3], msg[0].buf, msg[0].len);
 
-			if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3,
-						ibuf, 1, 0) < 0)
+			if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3,
+						state->data, 1, 0) < 0)
 				err("i2c transfer failed.");
 
 		}
 		break;
 	case 2:
 		/* always i2c read */
-		obuf[0] = 0x09;
-		obuf[1] = msg[0].len;
-		obuf[2] = msg[1].len;
-		obuf[3] = msg[0].addr;
-		memcpy(&obuf[4], msg[0].buf, msg[0].len);
-
-		if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4,
-					ibuf, msg[1].len + 1, 0) < 0)
+		state->data[0] = 0x09;
+		state->data[1] = msg[0].len;
+		state->data[2] = msg[1].len;
+		state->data[3] = msg[0].addr;
+		memcpy(&state->data[4], msg[0].buf, msg[0].len);
+
+		if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4,
+					state->data, msg[1].len + 1, 0) < 0)
 			err("i2c transfer failed.");
 
-		memcpy(msg[1].buf, &ibuf[1], msg[1].len);
+		memcpy(msg[1].buf, &state->data[1], msg[1].len);
 		break;
 	default:
 		warn("more than 2 i2c messages at a time is not handled yet.");
 		break;
 	}
+	mutex_unlock(&d->data_mutex);
 	mutex_unlock(&d->i2c_mutex);
 	return num;
 }
@@ -844,17 +855,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
 static int su3000_power_ctrl(struct dvb_usb_device *d, int i)
 {
 	struct dw2102_state *state = (struct dw2102_state *)d->priv;
-	u8 obuf[] = {0xde, 0};
+	int ret = 0;
 
 	info("%s: %d, initialized %d", __func__, i, state->initialized);
 
 	if (i && !state->initialized) {
+		mutex_lock(&d->data_mutex);
+
+		state->data[0] = 0xde;
+		state->data[1] = 0;
+
 		state->initialized = 1;
 		/* reset board */
-		return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0);
+		ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0);
+		mutex_unlock(&d->data_mutex);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
@@ -1309,49 +1326,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d)
 	return 0;
 }
 
-static int su3000_frontend_attach(struct dvb_usb_adapter *d)
+static int su3000_frontend_attach(struct dvb_usb_adapter *adap)
 {
-	u8 obuf[3] = { 0xe, 0x80, 0 };
-	u8 ibuf[] = { 0 };
+	struct dvb_usb_device *d = adap->dev;
+	struct dw2102_state *state = d->priv;
+
+	mutex_lock(&d->data_mutex);
+
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x02;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x02;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 	msleep(300);
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 0;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0x51;
+	state->data[0] = 0x51;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
-	d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config,
-					&d->dev->i2c_adap);
-	if (d->fe_adap[0].fe == NULL)
+	mutex_unlock(&d->data_mutex);
+
+	adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config,
+					&d->i2c_adap);
+	if (adap->fe_adap[0].fe == NULL)
 		return -EIO;
 
-	if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
+	if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe,
 				&dw2104_ts2020_config,
-				&d->dev->i2c_adap)) {
+				&d->i2c_adap)) {
 		info("Attached DS3000/TS2020!");
 		return 0;
 	}
@@ -1360,47 +1385,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d)
 	return -EIO;
 }
 
-static int t220_frontend_attach(struct dvb_usb_adapter *d)
+static int t220_frontend_attach(struct dvb_usb_adapter *adap)
 {
-	u8 obuf[3] = { 0xe, 0x87, 0 };
-	u8 ibuf[] = { 0 };
+	struct dvb_usb_device *d = adap->dev;
+	struct dw2102_state *state = d->priv;
+
+	mutex_lock(&d->data_mutex);
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	state->data[0] = 0xe;
+	state->data[1] = 0x87;
+	state->data[2] = 0x0;
+
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x86;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x86;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x80;
-	obuf[2] = 0;
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
 	msleep(50);
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x80;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0x51;
+	state->data[0] = 0x51;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
-	d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config,
-					&d->dev->i2c_adap, NULL);
-	if (d->fe_adap[0].fe != NULL) {
-		if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60,
-					&d->dev->i2c_adap, &tda18271_config)) {
+	mutex_unlock(&d->data_mutex);
+
+	adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config,
+					&d->i2c_adap, NULL);
+	if (adap->fe_adap[0].fe != NULL) {
+		if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60,
+					&d->i2c_adap, &tda18271_config)) {
 			info("Attached TDA18271HD/CXD2820R!");
 			return 0;
 		}
@@ -1410,23 +1443,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d)
 	return -EIO;
 }
 
-static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d)
+static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap)
 {
-	u8 obuf[] = { 0x51 };
-	u8 ibuf[] = { 0 };
+	struct dvb_usb_device *d = adap->dev;
+	struct dw2102_state *state = d->priv;
+
+	mutex_lock(&d->data_mutex);
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+	state->data[0] = 0x51;
+
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
-	d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config,
-					&d->dev->i2c_adap);
+	mutex_unlock(&d->data_mutex);
 
-	if (d->fe_adap[0].fe == NULL)
+	adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach,
+					&s421_m88rs2000_config,
+					&d->i2c_adap);
+
+	if (adap->fe_adap[0].fe == NULL)
 		return -EIO;
 
-	if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
+	if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe,
 				&dw2104_ts2020_config,
-				&d->dev->i2c_adap)) {
+				&d->i2c_adap)) {
 		info("Attached RS2000/TS2020!");
 		return 0;
 	}
@@ -1439,44 +1479,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap)
 {
 	struct dvb_usb_device *d = adap->dev;
 	struct dw2102_state *state = d->priv;
-	u8 obuf[3] = { 0xe, 0x80, 0 };
-	u8 ibuf[] = { 0 };
 	struct i2c_adapter *i2c_adapter;
 	struct i2c_client *client;
 	struct i2c_board_info board_info;
 	struct m88ds3103_platform_data m88ds3103_pdata = {};
 	struct ts2020_config ts2020_config = {};
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	mutex_lock(&d->data_mutex);
+
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 0x0;
+
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x02;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x02;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 	msleep(300);
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 0;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0x51;
+	state->data[0] = 0x51;
 
-	if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
+	mutex_unlock(&d->data_mutex);
+
 	/* attach demod */
 	m88ds3103_pdata.clk = 27000000;
 	m88ds3103_pdata.i2c_wr_max = 33;
-- 
cgit v1.2.3


From 9ce9f7999741f342eeffd036ab09213a2fa93040 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Mon, 13 Feb 2017 13:49:58 -0600
Subject: gpio: altera-a10sr: Set gpio_chip parent property

Set the gpio_chip parent property since some recent functions
such as devprop_gpiochip_set_names() can use it.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-altera-a10sr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c
index 9e1a138fed53..16a8951b2bed 100644
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev)
 	gpio->regmap = a10sr->regmap;
 
 	gpio->gp = altr_a10sr_gc;
-
+	gpio->gp.parent = pdev->dev.parent;
 	gpio->gp.of_node = pdev->dev.of_node;
 
 	ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio);
-- 
cgit v1.2.3


From fa6256db033067b57318decdc1c583512a1f8f68 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 15 Feb 2017 02:02:06 +0300
Subject: gpio: mockup: return -EFAULT if copy_from_user() fails

copy_from_user() returns the number of bytes remaining to be copied but
we want to return negative error codes on failue.

Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mockup.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 06dac72cb69c..d99338689213 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file,
 	struct seq_file *sfile;
 	struct gpio_desc *desc;
 	struct gpio_chip *gc;
-	int status, val;
+	int val;
 	char buf;
 
 	sfile = file->private_data;
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
 	chip = priv->chip;
 	gc = &chip->gc;
 
-	status = copy_from_user(&buf, usr_buf, 1);
-	if (status)
-		return status;
+	if (copy_from_user(&buf, usr_buf, 1))
+		return -EFAULT;
 
 	if (buf == '0')
 		val = 0;
-- 
cgit v1.2.3


From b115bebc07f282067eccc06fd5aa3060ab1426da Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Feb 2017 16:13:44 +0100
Subject: gpio: xgene: mark PM functions as __maybe_unused

When CONFIG_PM_SLEEP is disabled, we get a warning about unused functions:

drivers/gpio/gpio-xgene.c:155:12: warning: 'xgene_gpio_resume' defined but not used [-Wunused-function]
 static int xgene_gpio_resume(struct device *dev)
            ^~~~~~~~~~~~~~~~~
drivers/gpio/gpio-xgene.c:142:12: warning: 'xgene_gpio_suspend' defined but not used [-Wunused-function]
 static int xgene_gpio_suspend(struct device *dev)

The warnings are harmless and can be avoided by simplifying the code and marking
the functions as __maybe_unused.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-xgene.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c
index 40a8881c2ce8..f1c6ec17b90a 100644
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c
@@ -42,9 +42,7 @@ struct xgene_gpio {
 	struct gpio_chip	chip;
 	void __iomem		*base;
 	spinlock_t		lock;
-#ifdef CONFIG_PM
 	u32			set_dr_val[XGENE_MAX_GPIO_BANKS];
-#endif
 };
 
 static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc,
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int xgene_gpio_suspend(struct device *dev)
+static __maybe_unused int xgene_gpio_suspend(struct device *dev)
 {
 	struct xgene_gpio *gpio = dev_get_drvdata(dev);
 	unsigned long bank_offset;
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev)
 	return 0;
 }
 
-static int xgene_gpio_resume(struct device *dev)
+static __maybe_unused int xgene_gpio_resume(struct device *dev)
 {
 	struct xgene_gpio *gpio = dev_get_drvdata(dev);
 	unsigned long bank_offset;
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev)
 }
 
 static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
-#define XGENE_GPIO_PM_OPS	(&xgene_gpio_pm)
-#else
-#define XGENE_GPIO_PM_OPS	NULL
-#endif
 
 static int xgene_gpio_probe(struct platform_device *pdev)
 {
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = {
 		.name = "xgene-gpio",
 		.of_match_table = xgene_gpio_of_match,
 		.acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match),
-		.pm     = XGENE_GPIO_PM_OPS,
+		.pm     = &xgene_gpio_pm,
 	},
 	.probe = xgene_gpio_probe,
 };
-- 
cgit v1.2.3


From f759921cfbf4847319d197a6ed7c9534d593f8bc Mon Sep 17 00:00:00 2001
From: Phil Reid <preid@electromag.com.au>
Date: Mon, 20 Feb 2017 09:41:45 +0800
Subject: gpio: altera: Use handle_level_irq when configured as a level_high

When a threaded irq handler is chained attached to one of the gpio
pins when configure for level irq the altera_gpio_irq_leveL_high_handler
does not mask the interrupt while being handled by the chained irq.
This resulting in the threaded irq not getting enough cycles to complete
quickly enough before the irq was disabled as faulty. handle_level_irq
should be used in this situation instead of handle_simple_irq.

In gpiochip_irqchip_add set default handler to handle_bad_irq as
per Documentation/gpio/driver.txt. Then set the correct handler in
the set_type callback.

Signed-off-by: Phil Reid <preid@electromag.com.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-altera.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 5bddbd507ca9..3fe6a21e05a5 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
 
 	altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
 
-	if (type == IRQ_TYPE_NONE)
+	if (type == IRQ_TYPE_NONE) {
+		irq_set_handler_locked(d, handle_bad_irq);
 		return 0;
-	if (type == IRQ_TYPE_LEVEL_HIGH &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
-		return 0;
-	if (type == IRQ_TYPE_EDGE_RISING &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
-		return 0;
-	if (type == IRQ_TYPE_EDGE_FALLING &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
-		return 0;
-	if (type == IRQ_TYPE_EDGE_BOTH &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
+	}
+	if (type == altera_gc->interrupt_trigger) {
+		if (type == IRQ_TYPE_LEVEL_HIGH)
+			irq_set_handler_locked(d, handle_level_irq);
+		else
+			irq_set_handler_locked(d, handle_simple_irq);
 		return 0;
-
+	}
+	irq_set_handler_locked(d, handle_bad_irq);
 	return -EINVAL;
 }
 
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
-
 static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
 {
 	struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
 	altera_gc->interrupt_trigger = reg;
 
 	ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
-		handle_simple_irq, IRQ_TYPE_NONE);
+		handle_bad_irq, IRQ_TYPE_NONE);
 
 	if (ret) {
 		dev_err(&pdev->dev, "could not add irqchip\n");
-- 
cgit v1.2.3


From 8e51533780ba223a3562ff4382c6b6f350c7e9a4 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Fri, 10 Feb 2017 17:21:00 -0600
Subject: pinctrl: qcom: add get_direction function

The get_direction callback function allows gpiolib to know the current
direction (input vs output) for a given GPIO.

This is particularly useful on ACPI systems, where the GPIOs are
configured only by firmware (typically UEFI), so the only way to
know the initial values to query the hardware directly.  Without
this function, gpiolib thinks that all GPIOs are configured for
input.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-msm.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index f8e9e1c2b2f6..c978be5eb9eb 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -422,6 +422,20 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in
 	return 0;
 }
 
+static int msm_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+	struct msm_pinctrl *pctrl = gpiochip_get_data(chip);
+	const struct msm_pingroup *g;
+	u32 val;
+
+	g = &pctrl->soc->groups[offset];
+
+	val = readl(pctrl->regs + g->ctl_reg);
+
+	/* 0 = output, 1 = input */
+	return val & BIT(g->oe_bit) ? 0 : 1;
+}
+
 static int msm_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
 	const struct msm_pingroup *g;
@@ -510,6 +524,7 @@ static void msm_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 static struct gpio_chip msm_gpio_template = {
 	.direction_input  = msm_gpio_direction_input,
 	.direction_output = msm_gpio_direction_output,
+	.get_direction    = msm_gpio_get_direction,
 	.get              = msm_gpio_get,
 	.set              = msm_gpio_set,
 	.request          = gpiochip_generic_request,
-- 
cgit v1.2.3


From 96e1ce8fcd66d850196758c038bd9d6f7857c518 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Mon, 20 Feb 2017 21:00:42 +0900
Subject: pinctrl: uniphier: change pin names of aio/xirq for LD11

This patch changes pin names of AIO and XIRQ according to updated
specification.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
index 77a0236ee781..83f8864fa76a 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
@@ -390,22 +390,22 @@ static const struct pinctrl_pin_desc uniphier_ld11_pins[] = {
 	UNIPHIER_PINCTRL_PIN(140, "AO1D0", 140,
 			     140, UNIPHIER_PIN_DRV_1BIT,
 			     140, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(141, "TCON0", 141,
+	UNIPHIER_PINCTRL_PIN(141, "AO1D1", 141,
 			     141, UNIPHIER_PIN_DRV_1BIT,
 			     141, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(142, "TCON1", 142,
+	UNIPHIER_PINCTRL_PIN(142, "AO1D2", 142,
 			     142, UNIPHIER_PIN_DRV_1BIT,
 			     142, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(143, "TCON2", 143,
+	UNIPHIER_PINCTRL_PIN(143, "XIRQ9", 143,
 			     143, UNIPHIER_PIN_DRV_1BIT,
 			     143, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(144, "TCON3", 144,
+	UNIPHIER_PINCTRL_PIN(144, "XIRQ10", 144,
 			     144, UNIPHIER_PIN_DRV_1BIT,
 			     144, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(145, "TCON4", 145,
+	UNIPHIER_PINCTRL_PIN(145, "XIRQ11", 145,
 			     145, UNIPHIER_PIN_DRV_1BIT,
 			     145, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(146, "TCON5", 146,
+	UNIPHIER_PINCTRL_PIN(146, "XIRQ13", 146,
 			     146, UNIPHIER_PIN_DRV_1BIT,
 			     146, UNIPHIER_PIN_PULL_DOWN),
 	UNIPHIER_PINCTRL_PIN(147, "PWMA", 147,
-- 
cgit v1.2.3


From b3bd0f2849f4480fc4f40bb954d27e58f4f0786b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 23:50:19 +0100
Subject: staging/vc04_services: add CONFIG_OF dependency

After several hours of debugging this obviously bogus but elaborate
gcc-7.0.1 warning,

drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c: In function 'vchiq_complete_bulk':
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:603:4: error: argument 2 null where non-null expected [-Werror=nonnull]
    memcpy((char *)page_address(pages[0]) +
    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     pagelist->offset,
     ~~~~~~~~~~~~~~~~~
     fragments,
     ~~~~~~~~~~
     head_bytes);
     ~~~~~~~~~~~
In file included from include/linux/string.h:18:0,
                 from include/linux/bitmap.h:8,
                 from include/linux/cpumask.h:11,
                 from include/linux/interrupt.h:9,
                 from drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:37:
arch/arm/include/asm/string.h:16:15: note: in a call to function 'memcpy' declared here
 extern void * memcpy(void *, const void *, __kernel_size_t) __nocapture(2);
               ^~~~~~

I have concluded that gcc was technically right in the first place:

vchiq_complete_bulk is an externally visible function that calls
free_pagelist(), which in turn derives a pointer from the global
g_fragments_base variable.

g_fragments_base is initialized in vchiq_platform_init(), but
we only get there if of_property_read_u32() successfully reads the
cache line size. When CONFIG_OF is disabled, this always fails, and
g_fragments_base is guaranteed to be NULL when vchiq_complete_bulk()
gets called.

This adds a CONFIG_OF Kconfig dependency, which is also technically correct
but nonobvious, and thus seems like a good fit for the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vc04_services/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig
index e61e4ca064a8..74094fff4367 100644
--- a/drivers/staging/vc04_services/Kconfig
+++ b/drivers/staging/vc04_services/Kconfig
@@ -1,6 +1,7 @@
 config BCM2835_VCHIQ
 	tristate "Videocore VCHIQ"
 	depends on HAS_DMA
+	depends on OF
 	depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE)
 	default y
 	help
-- 
cgit v1.2.3


From f2f10b7e722a75c6d75a7f7cd06b0eee3ae20f7c Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Fri, 17 Feb 2017 07:40:52 -0600
Subject: HID: chicony: Add support for another ASUS Zen AiO keyboard

Add support for media keys on the keyboard that comes with the
Asus V221ID and ZN241IC All In One computers.

The keys to support here are WLAN, BRIGHTNESSDOWN and BRIGHTNESSUP.

This device is not visibly branded as Chicony, and the USB Vendor ID
suggests that it is a JESS device. However this seems like the right place
to put it: the usage codes are identical to the currently supported
devices, and this driver already supports the ASUS AIO keyboard AK1D.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig       | 4 ++--
 drivers/hid/hid-chicony.c | 1 +
 drivers/hid/hid-core.c    | 1 +
 drivers/hid/hid-ids.h     | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 1aeb80e52424..8eab3200ac9a 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -175,11 +175,11 @@ config HID_CHERRY
 	Support for Cherry Cymotion keyboard.
 
 config HID_CHICONY
-	tristate "Chicony Tactical pad"
+	tristate "Chicony devices"
 	depends on HID
 	default !EXPERT
 	---help---
-	Support for Chicony Tactical pad.
+	Support for Chicony Tactical pad and special keys on Chicony keyboards.
 
 config HID_CORSAIR
 	tristate "Corsair devices"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index bc3cec199fee..f04ed9aabc3f 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e9e87d337446..ae01ae601d74 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1910,6 +1910,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 86c95d30ac80..b3df60da0297 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -557,6 +557,7 @@
 
 #define USB_VENDOR_ID_JESS		0x0c45
 #define USB_DEVICE_ID_JESS_YUREX	0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD	0x5112
 
 #define USB_VENDOR_ID_JESS2		0x0f30
 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
-- 
cgit v1.2.3


From a687c5765b5ae19fe559e14615ddc87ebb46d409 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick.colenbrander@sony.com>
Date: Fri, 24 Feb 2017 16:14:15 -0800
Subject: HID: sony: Fix input device leak when connecting a DS4 twice using
 USB/BT

When a user connects a DS4 twice using USB and BT, we reject the
second device connection after the setup work. We then perform
a cleanup, but during cleanup we are not removing the touchpad
device. This leads to leakage of an input device, which we would
never remove. It can likely result into a kernel oops as well
when the touchpad evdev node is accessed and the underlaying HID
device has been removed from the system.

[jkosina@suse.cz: added stable annotation]
Fixes: ac797b95f532 ("HID: sony: Make the DS4 touchpad a separate device")
Cc: stable@vger.kernel.org
Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sony.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index f405b07d0381..740996f9bdd4 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2632,6 +2632,8 @@ err_stop:
 		sony_leds_remove(sc);
 	if (sc->quirks & SONY_BATTERY_SUPPORT)
 		sony_battery_remove(sc);
+	if (sc->touchpad)
+		sony_unregister_touchpad(sc);
 	sony_cancel_work_sync(sc);
 	kfree(sc->output_report_dmabuf);
 	sony_remove_dev_list(sc);
-- 
cgit v1.2.3


From eb38d913c27f32f4df173791051fecf6aca34173 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 2 Mar 2017 10:44:58 +0200
Subject: Revert "usb: gadget: uvc: Add missing call for additional setup data"

This reverts commit 4fbac5206afd01b717d4bdc58793d471f3391b4b.

This commit breaks g_webcam when used with uvc-gadget [1].

The user space application (e.g. uvc-gadget) is responsible for
sending response to UVC class specific requests on control endpoint
in uvc_send_response() in uvc_v4l2.c.

The bad commit was causing a duplicate response to be sent with
incorrect response data thus causing UVC probe to fail at the host
and broken control transfer endpoint at the gadget.

[1] - git://git.ideasonboard.org/uvc-gadget.git

Cc: <stable@vger.kernel.org> # v4.9+
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uvc.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 27ed51b5082f..29b41b5dee04 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -258,13 +258,6 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req));
 	v4l2_event_queue(&uvc->vdev, &v4l2_event);
 
-	/* Pass additional setup data to userspace */
-	if (uvc->event_setup_out && uvc->event_length) {
-		uvc->control_req->length = uvc->event_length;
-		return usb_ep_queue(uvc->func.config->cdev->gadget->ep0,
-			uvc->control_req, GFP_ATOMIC);
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 5bbc852676ae08e818241cf66a3ffe4be44225c4 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Tue, 28 Feb 2017 14:25:45 +0800
Subject: usb: gadget: dummy_hcd: clear usb_gadget region before registration

When the user does device unbind and rebind test, the kernel will
show below dump due to usb_gadget memory region is dirty after unbind.
Clear usb_gadget region for every new probe.

root@imx6qdlsolo:/sys/bus/platform/drivers/dummy_udc# echo dummy_udc.0 > bind
[  102.523312] kobject (eddd78b0): tried to init an initialized object, something is seriously wrong.
[  102.532447] CPU: 0 PID: 734 Comm: sh Not tainted 4.10.0-rc7-00872-g1b2b8e9 #1298
[  102.539866] Hardware name: Freescale i.MX6 SoloX (Device Tree)
[  102.545717] Backtrace:
[  102.548225] [<c010d090>] (dump_backtrace) from [<c010d338>] (show_stack+0x18/0x1c)
[  102.555822]  r7:ede34000 r6:60010013 r5:00000000 r4:c0f29418
[  102.561512] [<c010d320>] (show_stack) from [<c040c2a4>] (dump_stack+0xb4/0xe8)
[  102.568764] [<c040c1f0>] (dump_stack) from [<c040e6d4>] (kobject_init+0x80/0x9c)
[  102.576187]  r10:0000001f r9:eddd7000 r8:eeaf8c10 r7:eddd78a8 r6:c177891c r5:c0f3b060
[  102.584036]  r4:eddd78b0 r3:00000000
[  102.587641] [<c040e654>] (kobject_init) from [<c05359a4>] (device_initialize+0x28/0xf8)
[  102.595665]  r5:eebc4800 r4:eddd78a8
[  102.599268] [<c053597c>] (device_initialize) from [<c05382ac>] (device_register+0x14/0x20)
[  102.607556]  r7:eddd78a8 r6:00000000 r5:eebc4800 r4:eddd78a8
[  102.613256] [<c0538298>] (device_register) from [<c0668ef4>] (usb_add_gadget_udc_release+0x8c/0x1ec)
[  102.622410]  r5:eebc4800 r4:eddd7860
[  102.626015] [<c0668e68>] (usb_add_gadget_udc_release) from [<c0669068>] (usb_add_gadget_udc+0x14/0x18)
[  102.635351]  r10:0000001f r9:eddd7000 r8:eddd788c r7:bf003770 r6:eddd77f8 r5:eddd7818
[  102.643198]  r4:eddd785c r3:eddd7b24
[  102.646834] [<c0669054>] (usb_add_gadget_udc) from [<bf003428>] (dummy_udc_probe+0x170/0x1c4 [dummy_hcd])
[  102.656458] [<bf0032b8>] (dummy_udc_probe [dummy_hcd]) from [<c053d114>] (platform_drv_probe+0x54/0xb8)
[  102.665881]  r10:00000008 r9:c1778960 r8:bf004128 r7:fffffdfb r6:bf004128 r5:eeaf8c10
[  102.673727]  r4:eeaf8c10
[  102.676293] [<c053d0c0>] (platform_drv_probe) from [<c053b160>] (driver_probe_device+0x264/0x474)
[  102.685186]  r7:00000000 r6:00000000 r5:c1778960 r4:eeaf8c10
[  102.690876] [<c053aefc>] (driver_probe_device) from [<c05397c4>] (bind_store+0xb8/0x14c)
[  102.698994]  r10:eeb3bb4c r9:ede34000 r8:0000000c r7:eeaf8c44 r6:bf004128 r5:c0f3b668
[  102.706840]  r4:eeaf8c10
[  102.709402] [<c053970c>] (bind_store) from [<c0538ca8>] (drv_attr_store+0x28/0x34)
[  102.716998]  r9:ede34000 r8:00000000 r7:ee3863c0 r6:ee3863c0 r5:c0538c80 r4:c053970c
[  102.724776] [<c0538c80>] (drv_attr_store) from [<c029c930>] (sysfs_kf_write+0x50/0x54)
[  102.732711]  r5:c0538c80 r4:0000000c
[  102.736313] [<c029c8e0>] (sysfs_kf_write) from [<c029be84>] (kernfs_fop_write+0x100/0x214)
[  102.744599]  r7:ee3863c0 r6:eeb3bb40 r5:00000000 r4:00000000
[  102.750287] [<c029bd84>] (kernfs_fop_write) from [<c0222dd8>] (__vfs_write+0x34/0x120)
[  102.758231]  r10:00000000 r9:ede34000 r8:c0108bc4 r7:0000000c r6:ede35f80 r5:c029bd84
[  102.766077]  r4:ee223780
[  102.768638] [<c0222da4>] (__vfs_write) from [<c0224678>] (vfs_write+0xa8/0x170)
[  102.775974]  r9:ede34000 r8:c0108bc4 r7:ede35f80 r6:01861cb0 r5:ee223780 r4:0000000c
[  102.783743] [<c02245d0>] (vfs_write) from [<c0225498>] (SyS_write+0x4c/0xa8)
[  102.790818]  r9:ede34000 r8:c0108bc4 r7:0000000c r6:01861cb0 r5:ee223780 r4:ee223780
[  102.798595] [<c022544c>] (SyS_write) from [<c0108a20>] (ret_fast_syscall+0x0/0x1c)
[  102.806188]  r7:00000004 r6:b6e83d58 r5:01861cb0 r4:0000000c

Fixes: 90fccb529d24 ("usb: gadget: Gadget directory cleanup - group UDC drivers")
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/dummy_hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index c60abe3a68f9..8cabc5944d5f 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -1031,6 +1031,8 @@ static int dummy_udc_probe(struct platform_device *pdev)
 	int		rc;
 
 	dum = *((void **)dev_get_platdata(&pdev->dev));
+	/* Clear usb_gadget region for new registration to udc-core */
+	memzero_explicit(&dum->gadget, sizeof(struct usb_gadget));
 	dum->gadget.name = gadget_name;
 	dum->gadget.ops = &dummy_ops;
 	dum->gadget.max_speed = USB_SPEED_SUPER;
-- 
cgit v1.2.3


From 077dbaee9df53c597df532a08d721d03f4570f3d Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Thu, 23 Feb 2017 10:48:55 +0100
Subject: irqchip/crossbar: Fix incorrect type of local variables

The max and entry variables are unsigned according to the dt-bindings.
Fix following 3 sparse issues (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:222:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:222:52:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:245:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:245:56:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:263:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:263:56:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-crossbar.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 1eef56a89b1f..05bbf171df37 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 
 static int __init crossbar_of_init(struct device_node *node)
 {
-	int i, size, max = 0, reserved = 0, entry;
+	int i, size, reserved = 0;
+	u32 max = 0, entry;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 
-- 
cgit v1.2.3


From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Date: Thu, 2 Mar 2017 16:11:47 +0100
Subject: irqdomain: Add empty irq_domain_check_msi_remap

Fix following build error for s390:
drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group':
drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap'

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqdomain.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 188eced6813e..9f3616085423 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
 {
 	return NULL;
 }
+static inline bool irq_domain_check_msi_remap(void)
+{
+	return false;
+}
 #endif /* !CONFIG_IRQ_DOMAIN */
 
 #endif /* _LINUX_IRQDOMAIN_H */
-- 
cgit v1.2.3


From 38355b2a44776c25b0f2ad466e8c51bb805b3032 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 28 Feb 2017 10:55:30 +0000
Subject: usb: gadget: configs: plug memory leak

When binding a gadget to a device, "name" is stored in gi->udc_name, but
this does not happen when unregistering and the string is leaked.

Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/configfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 78c44979dde3..cbff3b02840d 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item,
 		ret = unregister_gadget(gi);
 		if (ret)
 			goto err;
+		kfree(name);
 	} else {
 		if (gi->composite.gadget_driver.udc_name) {
 			ret = -EBUSY;
-- 
cgit v1.2.3


From 73561128eb72ca04c3be6e240f162a861bf68a86 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Mon, 27 Feb 2017 11:52:46 +0100
Subject: usb: dwc3: Fix incorrect type for utmi mode

The utmi mode is unsigned according the dt-bindings.
Fix sparse issue (-Wtypesign):

  drivers/usb/dwc3/dwc3-omap.c:391:50: warning: incorrect type in argument 3 (different signedness)
  drivers/usb/dwc3/dwc3-omap.c:391:50:    expected unsigned int [usertype] *out_value
  drivers/usb/dwc3/dwc3-omap.c:391:50:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 2092e46b1380..4a595777969e 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -392,7 +392,7 @@ static void dwc3_omap_set_utmi_mode(struct dwc3_omap *omap)
 {
 	u32			reg;
 	struct device_node	*node = omap->dev->of_node;
-	int			utmi_mode = 0;
+	u32			utmi_mode = 0;
 
 	reg = dwc3_omap_read_utmi_ctrl(omap);
 
-- 
cgit v1.2.3


From 4242820277b5378c9e4064e79306f326d731472f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Feb 2017 11:33:27 +0100
Subject: usb: gadget: udc: atmel: fix debug output

The debug output now contains the wrong variable, as seen from the compiler
warning:

drivers/usb/gadget/udc/atmel_usba_udc.c: In function 'usba_ep_enable':
drivers/usb/gadget/udc/atmel_usba_udc.c:632:550: error: 'ept_cfg' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n",

This changes the debug output the same way as the other code.

Fixes: 741d2558bf0a ("usb: gadget: udc: atmel: Update endpoint allocation scheme")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/atmel_usba_udc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 11bbce28bc23..2035906b8ced 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -610,7 +610,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 {
 	struct usba_ep *ep = to_usba_ep(_ep);
 	struct usba_udc *udc = ep->udc;
-	unsigned long flags, ept_cfg, maxpacket;
+	unsigned long flags, maxpacket;
 	unsigned int nr_trans;
 
 	DBG(DBG_GADGET, "%s: ep_enable: desc=%p\n", ep->ep.name, desc);
@@ -630,7 +630,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	ep->is_in = 0;
 
 	DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n",
-			ep->ep.name, ept_cfg, maxpacket);
+			ep->ep.name, ep->ept_cfg, maxpacket);
 
 	if (usb_endpoint_dir_in(desc)) {
 		ep->is_in = 1;
-- 
cgit v1.2.3


From b6e7aeeaf235901c42ec35de4633c7c69501d303 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 21 Feb 2017 22:33:11 +0100
Subject: USB: gadgetfs: Fix a potential memory leak in 'dev_config()'

'kbuf' is allocated just a few lines above using 'memdup_user()'.
If the 'if (dev->buf)' test fails, this memory is never released.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/legacy/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index a2615d64d07c..0513dfa008e6 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1782,8 +1782,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 
 	spin_lock_irq (&dev->lock);
 	value = -EINVAL;
-	if (dev->buf)
+	if (dev->buf) {
+		kfree(kbuf);
 		goto fail;
+	}
 	dev->buf = kbuf;
 
 	/* full or low speed config */
-- 
cgit v1.2.3


From 3ba534df815f233535b5a3dd3de41055666bbd21 Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <januszx.dziedzic@linux.intel.com>
Date: Thu, 16 Feb 2017 14:54:12 +0100
Subject: Revert "usb: gadget: f_fs: Fix ExtCompat descriptor validation"

This reverts commit ac670a3a650b899fc020b81f63e810d06015b865.

This introduce bug we already fixed in
commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 wof OS_DESC_EXT_COMPAT")

Next FFS (adb) SS enumeration fail with Windows OS.

Signed-off-by: Janusz Dziedzic <januszx.dziedzic@linux.intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 7e976f00cb02..a0085571824d 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2264,7 +2264,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 
 		if (len < sizeof(*d) ||
 		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-		    d->Reserved1)
+		    !d->Reserved1)
 			return -EINVAL;
 		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
 			if (d->Reserved2[i])
-- 
cgit v1.2.3


From 1551e35ea4189c1f7199fe278395fc94196715f2 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 15 Feb 2017 14:16:26 +0200
Subject: usb: dwc3: gadget: Fix system suspend/resume on TI platforms

On TI platforms (dra7, am437x), the DWC3_DSTS_DEVCTRLHLT bit is not set
after the device controller is stopped via DWC3_DCTL_RUN_STOP.

If we don't disconnect and stop the gadget, it stops working after a
system resume with the trace below.

There is no point in preventing gadget disconnect and gadget stop during
system suspend/resume as we're going to suspend in any case, whether
DEVCTRLHLT timed out or not.

[  141.727480] ------------[ cut here ]------------
[  141.732349] WARNING: CPU: 1 PID: 2135 at drivers/usb/dwc3/gadget.c:2384 dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3]
[  141.744299] Modules linked in: usb_f_ss_lb g_zero libcomposite xhci_plat_hcd xhci_hcd usbcore dwc3 evdev udc_core m25p80 usb_common spi_nor snd_soc_davinci_mcasp snd_soc_simple_card snd_soc_edma snd_soc_tlv3e
[  141.792163] CPU: 1 PID: 2135 Comm: irq/456-dwc3 Not tainted 4.10.0-rc8 #1138
[  141.799547] Hardware name: Generic DRA74X (Flattened Device Tree)
[  141.805940] [<c01101b4>] (unwind_backtrace) from [<c010c31c>] (show_stack+0x10/0x14)
[  141.814066] [<c010c31c>] (show_stack) from [<c04a0918>] (dump_stack+0xac/0xe0)
[  141.821648] [<c04a0918>] (dump_stack) from [<c013708c>] (__warn+0xd8/0x104)
[  141.828955] [<c013708c>] (__warn) from [<c0137164>] (warn_slowpath_null+0x20/0x28)
[  141.836902] [<c0137164>] (warn_slowpath_null) from [<bf27784c>] (dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3])
[  141.848329] [<bf27784c>] (dwc3_stop_active_transfer.constprop.4 [dwc3]) from [<bf27ab14>] (__dwc3_gadget_ep_disable+0x64/0x528 [dwc3])
[  141.861034] [<bf27ab14>] (__dwc3_gadget_ep_disable [dwc3]) from [<bf27c27c>] (dwc3_gadget_ep_disable+0x3c/0xc8 [dwc3])
[  141.872280] [<bf27c27c>] (dwc3_gadget_ep_disable [dwc3]) from [<bf23b428>] (usb_ep_disable+0x11c/0x18c [udc_core])
[  141.883160] [<bf23b428>] (usb_ep_disable [udc_core]) from [<bf342774>] (disable_ep+0x18/0x54 [usb_f_ss_lb])
[  141.893408] [<bf342774>] (disable_ep [usb_f_ss_lb]) from [<bf3437b0>] (disable_endpoints+0x18/0x50 [usb_f_ss_lb])
[  141.904168] [<bf3437b0>] (disable_endpoints [usb_f_ss_lb]) from [<bf343814>] (disable_source_sink+0x2c/0x34 [usb_f_ss_lb])
[  141.915771] [<bf343814>] (disable_source_sink [usb_f_ss_lb]) from [<bf329a9c>] (reset_config+0x48/0x7c [libcomposite])
[  141.927012] [<bf329a9c>] (reset_config [libcomposite]) from [<bf329afc>] (composite_disconnect+0x2c/0x54 [libcomposite])
[  141.938444] [<bf329afc>] (composite_disconnect [libcomposite]) from [<bf23d7dc>] (usb_gadget_udc_reset+0x10/0x34 [udc_core])
[  141.950237] [<bf23d7dc>] (usb_gadget_udc_reset [udc_core]) from [<bf276d70>] (dwc3_gadget_reset_interrupt+0x64/0x698 [dwc3])
[  141.962022] [<bf276d70>] (dwc3_gadget_reset_interrupt [dwc3]) from [<bf27952c>] (dwc3_thread_interrupt+0x618/0x1a3c [dwc3])
[  141.973723] [<bf27952c>] (dwc3_thread_interrupt [dwc3]) from [<c01a7ce8>] (irq_thread_fn+0x1c/0x54)
[  141.983215] [<c01a7ce8>] (irq_thread_fn) from [<c01a7fbc>] (irq_thread+0x120/0x1f0)
[  141.991247] [<c01a7fbc>] (irq_thread) from [<c015ba14>] (kthread+0xf8/0x138)
[  141.998641] [<c015ba14>] (kthread) from [<c01078f0>] (ret_from_fork+0x14/0x24)
[  142.006213] ---[ end trace b4ecfe9f175b9a9c ]---

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index f875b3f4b0ec..3db5eeae2bfb 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3291,15 +3291,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
 
 int dwc3_gadget_suspend(struct dwc3 *dwc)
 {
-	int ret;
-
 	if (!dwc->gadget_driver)
 		return 0;
 
-	ret = dwc3_gadget_run_stop(dwc, false, false);
-	if (ret < 0)
-		return ret;
-
+	dwc3_gadget_run_stop(dwc, false, false);
 	dwc3_disconnect_gadget(dwc);
 	__dwc3_gadget_stop(dwc);
 
-- 
cgit v1.2.3


From 0913750f9fb6f26bcd00c8f9dd9a8d1b8d031246 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 15 Feb 2017 13:38:22 +0200
Subject: usb: dwc3-omap: Fix missing break in dwc3_omap_set_mailbox()

We need to break from all cases if we want to treat
each one of them separately.

Reported-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Fixes: d2728fb3e01f ("usb: dwc3: omap: Pass VBUS and ID events transparently")
Cc: <stable@vger.kernel.org> #v4.8+
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-omap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 4a595777969e..f8d0747810e7 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -250,6 +250,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
 		val = dwc3_omap_read_utmi_ctrl(omap);
 		val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG;
 		dwc3_omap_write_utmi_ctrl(omap, val);
+		break;
 
 	case OMAP_DWC3_VBUS_OFF:
 		val = dwc3_omap_read_utmi_ctrl(omap);
-- 
cgit v1.2.3


From df7545719a14fa7b481896fb8689e23d0a00f682 Mon Sep 17 00:00:00 2001
From: Petr Cvek <petr.cvek@tul.cz>
Date: Fri, 24 Feb 2017 02:54:56 +0100
Subject: usb: gadget: pxa27x: Test for a valid argument pointer

A call usb_put_phy(udc->transceiver) must be tested for a valid pointer.
Use an already existing test for usb_unregister_notifier call.

Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Reported-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Petr Cvek <petr.cvek@tul.cz>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/pxa27x_udc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index e1335ad5bce9..832c4fdbe985 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev)
 	usb_del_gadget_udc(&udc->gadget);
 	pxa_cleanup_debugfs(udc);
 
-	if (!IS_ERR_OR_NULL(udc->transceiver))
+	if (!IS_ERR_OR_NULL(udc->transceiver)) {
 		usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy);
-	usb_put_phy(udc->transceiver);
+		usb_put_phy(udc->transceiver);
+	}
 
 	udc->transceiver = NULL;
 	the_controller = NULL;
-- 
cgit v1.2.3


From ef5e2fa9f65befa12f1113c734602d2c1964d2a5 Mon Sep 17 00:00:00 2001
From: Raz Manor <Raz.Manor@valens.com>
Date: Thu, 9 Feb 2017 09:41:08 +0200
Subject: usb: gadget: udc: net2280: Fix tmp reusage in net2280 driver

In the function scan_dma_completions() there is a reusage of tmp
variable. That coused a wrong value being used in some case when
reading a short packet terminated transaction from an endpoint,
in 2 concecutive reads.

This was my logic for the patch:

The req->td->dmadesc equals to 0 iff:
-- There was a transaction ending with a short packet, and
-- The read() to read it was shorter than the transaction length, and
-- The read() to complete it is longer than the residue.
I believe this is true from the printouts of various cases,
but I can't be positive it is correct.

Entering this if, there should be no more data in the endpoint
(a short packet terminated the transaction).
If there is, the transaction wasn't really done and we should exit and
wait for it to finish entirely. That is the inner if.
That inner if should never happen, but it is there to be on the safe
side. That is why it is marked with the comment /* paranoia */.
The size of the data available in the endpoint is ep->dma->dmacount
and it is read to tmp.
This entire clause is based on my own educated guesses.

If we passed that inner if without breaking in the original code,
than tmp & DMA_BYTE_MASK_COUNT== 0.
That means we will always pass dma bytes count of 0 to dma_done(),
meaning all the requested bytes were read.

dma_done() reports back to the upper layer that the request (read())
was done and how many bytes were read.
In the original code that would always be the request size,
regardless of the actual size of the data.
That did not make sense to me at all.

However, the original value of tmp is req->td->dmacount,
which is the dmacount value when the request's dma transaction was
finished. And that is a much more reasonable value to report back to
the caller.

To recreate the problem:
Read from a bulk out endpoint in a loop, 1024 * n bytes in each
iteration.
Connect the PLX to a host you can control.
Send to that endpoint 1024 * n + x bytes,
such that 0 < x < 1024 * n and (x % 1024) != 0
You would expect the first read() to return 1024 * n
and the second read() to return x.
But you will get the first read to return 1024 * n
and the second one to return 1024 * n.
That is true for every positive integer n.

Cc: Felipe Balbi <balbi@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Raz Manor <Raz.Manor@valens.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/net2280.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 85504419ab31..3828c2ec8623 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep)
 	 */
 	while (!list_empty(&ep->queue)) {
 		struct net2280_request	*req;
-		u32			tmp;
+		u32 req_dma_count;
 
 		req = list_entry(ep->queue.next,
 				struct net2280_request, queue);
 		if (!req->valid)
 			break;
 		rmb();
-		tmp = le32_to_cpup(&req->td->dmacount);
-		if ((tmp & BIT(VALID_BIT)) != 0)
+		req_dma_count = le32_to_cpup(&req->td->dmacount);
+		if ((req_dma_count & BIT(VALID_BIT)) != 0)
 			break;
 
 		/* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short"
@@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep)
 		 */
 		if (unlikely(req->td->dmadesc == 0)) {
 			/* paranoia */
-			tmp = readl(&ep->dma->dmacount);
-			if (tmp & DMA_BYTE_COUNT_MASK)
+			u32 const ep_dmacount = readl(&ep->dma->dmacount);
+
+			if (ep_dmacount & DMA_BYTE_COUNT_MASK)
 				break;
 			/* single transfer mode */
-			dma_done(ep, req, tmp, 0);
+			dma_done(ep, req, req_dma_count, 0);
 			num_completed++;
 			break;
 		} else if (!ep->is_in &&
 			   (req->req.length % ep->ep.maxpacket) &&
 			   !(ep->dev->quirks & PLX_PCIE)) {
 
-			tmp = readl(&ep->regs->ep_stat);
+			u32 const ep_stat = readl(&ep->regs->ep_stat);
 			/* AVOID TROUBLE HERE by not issuing short reads from
 			 * your gadget driver.  That helps avoids errata 0121,
 			 * 0122, and 0124; not all cases trigger the warning.
 			 */
-			if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) {
+			if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) {
 				ep_warn(ep->dev, "%s lost packet sync!\n",
 						ep->ep.name);
 				req->req.status = -EOVERFLOW;
 			} else {
-				tmp = readl(&ep->regs->ep_avail);
-				if (tmp) {
+				u32 const ep_avail = readl(&ep->regs->ep_avail);
+				if (ep_avail) {
 					/* fifo gets flushed later */
 					ep->out_overflow = 1;
 					ep_dbg(ep->dev,
 						"%s dma, discard %d len %d\n",
-						ep->ep.name, tmp,
+						ep->ep.name, ep_avail,
 						req->req.length);
 					req->req.status = -EOVERFLOW;
 				}
 			}
 		}
-		dma_done(ep, req, tmp, 0);
+		dma_done(ep, req, req_dma_count, 0);
 		num_completed++;
 	}
 
-- 
cgit v1.2.3


From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 2 Mar 2017 12:41:48 -0800
Subject: kvm: nVMX: VMCLEAR should not cause the vCPU to shut down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VMCLEAR should silently ignore a failure to clear the launch state of
the VMCS referenced by the operand.

Signed-off-by: Jim Mattson <jmattson@google.com>
[Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 283aa8601833..3b626d6dc3ac 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 zero = 0;
 	gpa_t vmptr;
-	struct vmcs12 *vmcs12;
-	struct page *page;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
-	page = nested_get_page(vcpu, vmptr);
-	if (page == NULL) {
-		/*
-		 * For accurate processor emulation, VMCLEAR beyond available
-		 * physical memory should do nothing at all. However, it is
-		 * possible that a nested vmx bug, not a guest hypervisor bug,
-		 * resulted in this case, so let's shut down before doing any
-		 * more damage:
-		 */
-		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-		return 1;
-	}
-	vmcs12 = kmap(page);
-	vmcs12->launch_state = 0;
-	kunmap(page);
-	nested_release_page(page);
+	kvm_vcpu_write_guest(vcpu,
+			vmptr + offsetof(struct vmcs12, launch_state),
+			&zero, sizeof(zero));
 
 	nested_free_vmcs02(vmx, vmptr);
 
-- 
cgit v1.2.3


From c771c14baa3319c85512e575671bf0bb18824c11 Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Thu, 2 Mar 2017 15:02:06 -0800
Subject: iomap: invalidate page caches should be after iomap_dio_complete() in
 direct write

After XFS switching to iomap based DIO (commit acdda3aae146 ("xfs:
use iomap_dio_rw")), I started to notice dio29/dio30 tests failures
from LTP run on ppc64 hosts, and they can be reproduced on x86_64
hosts with 512B/1k block size XFS too.

dio29	diotest3 -b 65536 -n 100 -i 1000 -o 1024000
dio30	diotest6 -b 65536 -n 100 -i 1000 -o 1024000

The failure message is like:
bufcmp: offset 0: Expected: 0x62, got 0x0
diotest03    1  TPASS  :  Read with Direct IO, Write without
diotest03    2  TFAIL  :  diotest3.c:142: comparsion failed; child=98 offset=1425408
diotest03    3  TFAIL  :  diotest3.c:194: Write Direct-child 98 failed

Direct write wrote 0x62 but buffer read got zero. This is because,
when doing direct write to a hole or preallocated file, we
invalidate the page caches before converting the extent from
unwritten state to normal state, which is done by
iomap_dio_complete(), thus leave a window for other buffer reader to
cache the unwritten state extent.

Consider this case, with sub-page blocksize XFS, two processes are
direct writing to different blocksize-aligned regions (say 512B) of
the same preallocated file, and reading the region back via buffered
I/O to compare contents.

process A, region [0,512]		process B, region [512,1024]
xfs_file_write_iter
 xfs_file_aio_dio_write
  iomap_dio_rw
   iomap_apply
   invalidate_inode_pages2_range
   					xfs_file_write_iter
				 	xfs_file_aio_dio_write
					  iomap_dio_rw
					   iomap_apply
					   invalidate_inode_pages2_range
					   iomap_dio_complete
					xfs_file_read_iter
					 xfs_file_buffered_aio_read
					  generic_file_read_iter
					   do_generic_file_read
					    <readahead fills pagecache with 0>
   iomap_dio_complete
xfs_file_read_iter
 <read gets 0 from pagecache>

Process A first invalidates page caches, at this point the
underlying extent is still in unwritten state (iomap_dio_complete
not called yet), and process B finishs direct write and populates
page caches via readahead, which caches zeros in page for region A,
then process A reads zeros from page cache, instead of the actual
data.

Fix it by invalidating page caches after converting unwritten extent
to make sure we read content from disk after extent state changed,
as what we did before switching to iomap based dio.

Also introduce a new 'start' variable to save the original write
offset (iomap_dio_complete() updates iocb->ki_pos), and a 'err'
variable for invalidating caches result, cause we can't reuse 'ret'
anymore.

Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 3ca1a8e44135..141c3cd55a8b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = file_inode(iocb->ki_filp);
 	size_t count = iov_iter_count(iter);
-	loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+	loff_t pos = iocb->ki_pos, start = pos;
+	loff_t end = iocb->ki_pos + count - 1, ret = 0;
 	unsigned int flags = IOMAP_DIRECT;
 	struct blk_plug plug;
 	struct iomap_dio *dio;
@@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	if (mapping->nrpages) {
-		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+		ret = filemap_write_and_wait_range(mapping, start, end);
 		if (ret)
 			goto out_free_dio;
 
 		ret = invalidate_inode_pages2_range(mapping,
-				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+				start >> PAGE_SHIFT, end >> PAGE_SHIFT);
 		WARN_ON_ONCE(ret);
 		ret = 0;
 	}
@@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		__set_current_state(TASK_RUNNING);
 	}
 
+	ret = iomap_dio_complete(dio);
+
 	/*
 	 * Try again to invalidate clean pages which might have been cached by
 	 * non-direct readahead, or faulted in by get_user_pages() if the source
@@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	 * this invalidation fails, tough, the write still worked...
 	 */
 	if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
-		ret = invalidate_inode_pages2_range(mapping,
-				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-		WARN_ON_ONCE(ret);
+		int err = invalidate_inode_pages2_range(mapping,
+				start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+		WARN_ON_ONCE(err);
 	}
 
-	return iomap_dio_complete(dio);
+	return ret;
 
 out_free_dio:
 	kfree(dio);
-- 
cgit v1.2.3


From 312eb712e15868236dd03c67971ab2c1d79b4ce6 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 17 Feb 2017 18:44:11 +0100
Subject: cgroup: Fix indenting in PID controller documentation

Follow the common documentation style in the file and indent the
interface file description by a tab instead of just a space.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/cgroup-v2.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 3b8449f8ac7e..49d7c997fa1e 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1142,16 +1142,17 @@ used by the kernel.
 
   pids.max
 
- A read-write single value file which exists on non-root cgroups.  The
- default is "max".
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "max".
 
- Hard limit of number of processes.
+	Hard limit of number of processes.
 
   pids.current
 
- A read-only single value file which exists on all cgroups.
+	A read-only single value file which exists on all cgroups.
 
- The number of processes currently in the cgroup and its descendants.
+	The number of processes currently in the cgroup and its
+	descendants.
 
 Organisational operations are not blocked by cgroup policies, so it is
 possible to have pids.current > pids.max.  This can be done by either
-- 
cgit v1.2.3


From 1d18c2747f937f1b5ec65ce6bf4ccb9ca1aea9e8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 1 Mar 2017 15:39:07 -0500
Subject: cgroup/pids: remove spurious suspicious RCU usage warning

pids_can_fork() is special in that the css association is guaranteed
to be stable throughout the function and thus doesn't need RCU
protection around task_css access.  When determining the css to charge
the pid, task_css_check() is used to override the RCU sanity check.

While adding a warning message on fork rejection from pids limit,
135b8b37bd91 ("cgroup: Add pids controller event when fork fails
because of pid limit") incorrectly added a task_css access which is
neither RCU protected or explicitly annotated.  This triggers the
following suspicious RCU usage warning when RCU debugging is enabled.

  cgroup: fork rejected by pids controller in

  ===============================
  [ ERR: suspicious RCU usage.  ]
  4.10.0-work+ #1 Not tainted
  -------------------------------
  ./include/linux/cgroup.h:435 suspicious rcu_dereference_check() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 0
  1 lock held by bash/1748:
   #0:  (&cgroup_threadgroup_rwsem){+++++.}, at: [<ffffffff81052c96>] _do_fork+0xe6/0x6e0

  stack backtrace:
  CPU: 3 PID: 1748 Comm: bash Not tainted 4.10.0-work+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014
  Call Trace:
   dump_stack+0x68/0x93
   lockdep_rcu_suspicious+0xd7/0x110
   pids_can_fork+0x1c7/0x1d0
   cgroup_can_fork+0x67/0xc0
   copy_process.part.58+0x1709/0x1e90
   _do_fork+0xe6/0x6e0
   SyS_clone+0x19/0x20
   do_syscall_64+0x5c/0x140
   entry_SYSCALL64_slow_path+0x25/0x25
  RIP: 0033:0x7f7853fab93a
  RSP: 002b:00007ffc12d05c90 EFLAGS: 00000246 ORIG_RAX: 0000000000000038
  RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7853fab93a
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
  RBP: 00007ffc12d05cc0 R08: 0000000000000000 R09: 00007f78548db700
  R10: 00007f78548db9d0 R11: 0000000000000246 R12: 00000000000006d4
  R13: 0000000000000001 R14: 0000000000000000 R15: 000055e3ebe2c04d
  /asdf

There's no reason to dereference task_css again here when the
associated css is already available.  Fix it by replacing the
task_cgroup() call with css->cgroup.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Galbraith <efault@gmx.de>
Fixes: 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit")
Cc: Kenny Yu <kennyyu@fb.com>
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/pids.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index e756dae49300..2237201d66d5 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task)
 		/* Only log the first time events_limit is incremented. */
 		if (atomic64_inc_return(&pids->events_limit) == 1) {
 			pr_info("cgroup: fork rejected by pids controller in ");
-			pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+			pr_cont_cgroup_path(css->cgroup);
 			pr_cont("\n");
 		}
 		cgroup_file_notify(&pids->events_file);
-- 
cgit v1.2.3


From b6a6759daf55dade2b65089957832759d502acfb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 25 Feb 2017 01:56:48 -0800
Subject: cgroups: censor kernel pointer in debug files

As found in grsecurity, this avoids exposing a kernel pointer through
the cgroup debug entries.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup-v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 56eba9caa632..1dc22f6b49f5 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 		struct task_struct *task;
 		int count = 0;
 
-		seq_printf(seq, "css_set %p\n", cset);
+		seq_printf(seq, "css_set %pK\n", cset);
 
 		list_for_each_entry(task, &cset->tasks, cg_list) {
 			if (count++ > MAX_TASKS_SHOWN_PER_CSS)
-- 
cgit v1.2.3


From d85fc67dd11e9a32966140677d4d6429ca540b25 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Fri, 3 Mar 2017 09:00:09 -0800
Subject: libata: transport: Remove circular dependency at free time

Without this patch, failed probe would not free resources like irq.

ata port tdev object currently hold a reference to the ata port
object.  Therefore the ata port object release function will not get
called until the ata_tport_release is called. But that would never
happen, releasing the last reference of ata port dev is done by
scsi_host_release, which is called by ata_host_release when the ata
port object is released.

The ata device objects actually do not need to explicitly hold a
reference to their real counterpart, given the transport objects are
the children of these objects and device_add() is call for each child.
We know the parent will not be deleted until we call the child's
device_del().

Reported-by: Matthew Whitehead <tedheadster@gmail.com>
Tested-by: Matthew Whitehead <tedheadster@gmail.com>
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-transport.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 46698232e6bf..19e6e539a061 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class,
 
 static void ata_tport_release(struct device *dev)
 {
-	put_device(dev->parent);
 }
 
 /**
@@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent,
 	device_initialize(dev);
 	dev->type = &ata_port_type;
 
-	dev->parent = get_device(parent);
+	dev->parent = parent;
 	dev->release = ata_tport_release;
 	dev_set_name(dev, "ata%d", ap->print_id);
 	transport_setup_device(dev);
@@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class,
 
 static void ata_tlink_release(struct device *dev)
 {
-	put_device(dev->parent);
 }
 
 /**
@@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link)
 	int error;
 
 	device_initialize(dev);
-	dev->parent = get_device(&ap->tdev);
+	dev->parent = &ap->tdev;
 	dev->release = ata_tlink_release;
 	if (ata_is_host_link(link))
 		dev_set_name(dev, "link%d", ap->print_id);
@@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class,
 
 static void ata_tdev_release(struct device *dev)
 {
-	put_device(dev->parent);
 }
 
 /**
@@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
 	int error;
 
 	device_initialize(dev);
-	dev->parent = get_device(&link->tdev);
+	dev->parent = &link->tdev;
 	dev->release = ata_tdev_release;
 	if (ata_is_host_link(link))
 		dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
-- 
cgit v1.2.3


From 0580b762a4d6b70817476b90042813f8573283fa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Mar 2017 15:26:54 -0500
Subject: libata: drop WARN from protocol error in ata_sff_qc_issue()

ata_sff_qc_issue() expects upper layers to never issue commands on a
command protocol that it doesn't implement.  While the assumption
holds fine with the usual IO path, nothing filters based on the
command protocol in the passthrough path (which was added later),
allowing the warning to be tripped with a passthrough command with the
right (well, wrong) protocol.

Failing with AC_ERR_SYSTEM is the right thing to do anyway.  Remove
the unnecessary WARN.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Link: http://lkml.kernel.org/r/CACT4Y+bXkvevNZU8uP6X0QVqsj6wNoUA_1exfTSOzc+SmUtMOA@mail.gmail.com
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-sff.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2bd92dca3e62..274d6d7193d7 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		break;
 
 	default:
-		WARN_ON_ONCE(1);
 		return AC_ERR_SYSTEM;
 	}
 
-- 
cgit v1.2.3


From 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Mar 2017 15:33:42 -0500
Subject: workqueue: trigger WARN if queue_delayed_work() is called with NULL
 @wq

If queue_delayed_work() gets called with NULL @wq, the kernel will
oops asynchronuosly on timer expiration which isn't too helpful in
tracking down the offender.  This actually happened with smc.

__queue_delayed_work() already does several input sanity checks
synchronously.  Add NULL @wq check.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 072cbc9b175d..c0168b7da1ea 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
+	WARN_ON_ONCE(!wq);
 	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
 		     timer->data != (unsigned long)dwork);
 	WARN_ON_ONCE(timer_pending(timer));
-- 
cgit v1.2.3


From 320661b08dd6f1746d5c7ab4eb435ec64b97cd45 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Sat, 25 Feb 2017 13:00:19 -0800
Subject: percpu: acquire pcpu_lock when updating pcpu_nr_empty_pop_pages

Update to pcpu_nr_empty_pop_pages in pcpu_alloc() is currently done
without holding pcpu_lock. This can lead to bad updates to the variable.
Add missing lock calls.

Fixes: b539b87fed37 ("percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # v3.18+
---
 mm/percpu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 5696039b5c07..60a6488e9e6d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1011,8 +1011,11 @@ area_found:
 		mutex_unlock(&pcpu_alloc_mutex);
 	}
 
-	if (chunk != pcpu_reserved_chunk)
+	if (chunk != pcpu_reserved_chunk) {
+		spin_lock_irqsave(&pcpu_lock, flags);
 		pcpu_nr_empty_pop_pages -= occ_pages;
+		spin_unlock_irqrestore(&pcpu_lock, flags);
+	}
 
 	if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
 		pcpu_schedule_balance_work();
-- 
cgit v1.2.3


From 8a1df543de8ad879d3c80bdda4c67ac4f82e7ee0 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Sat, 25 Feb 2017 12:59:26 -0800
Subject: percpu: remove unused chunk_alloc parameter from pcpu_get_pages()

pcpu_get_pages() doesn't use chunk_alloc parameter, remove it.

Fixes: fbbb7f4e149f ("percpu: remove the usage of separate populated bitmap in percpu-vm")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 mm/percpu-vm.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 538998a137d2..9ac639499bd1 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
 
 /**
  * pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
  *
  * Returns pointer to array of pointers to struct page which can be indexed
  * with pcpu_page_idx().  Note that there is only one array and accesses
@@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
  * RETURNS:
  * Pointer to temp pages array on success.
  */
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
 {
 	static struct page **pages;
 	size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
@@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
 {
 	struct page **pages;
 
-	pages = pcpu_get_pages(chunk);
+	pages = pcpu_get_pages();
 	if (!pages)
 		return -ENOMEM;
 
@@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
 	 * successful population attempt so the temp pages array must
 	 * be available now.
 	 */
-	pages = pcpu_get_pages(chunk);
+	pages = pcpu_get_pages();
 	BUG_ON(!pages);
 
 	/* unmap and free */
-- 
cgit v1.2.3


From 040757f738e13caaa9c5078bca79aa97e11dde88 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 5 Mar 2017 15:03:22 -0600
Subject: ucount: Remove the atomicity from ucount->count

Always increment/decrement ucount->count under the ucounts_lock.  The
increments are there already and moving the decrements there means the
locking logic of the code is simpler.  This simplification in the
locking logic fixes a race between put_ucounts and get_ucounts that
could result in a use-after-free because the count could go zero then
be found by get_ucounts and then be freed by put_ucounts.

A bug presumably this one was found by a combination of syzkaller and
KASAN.  JongWhan Kim reported the syzkaller failure and Dmitry Vyukov
spotted the race in the code.

Cc: stable@vger.kernel.org
Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  2 +-
 kernel/ucount.c                | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index be765234c0a2..32354b4b4b2b 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -72,7 +72,7 @@ struct ucounts {
 	struct hlist_node node;
 	struct user_namespace *ns;
 	kuid_t uid;
-	atomic_t count;
+	int count;
 	atomic_t ucount[UCOUNT_COUNTS];
 };
 
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 62630a40ab3a..b4eeee03934f 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 
 		new->ns = ns;
 		new->uid = uid;
-		atomic_set(&new->count, 0);
+		new->count = 0;
 
 		spin_lock_irq(&ucounts_lock);
 		ucounts = find_ucounts(ns, uid, hashent);
@@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 			ucounts = new;
 		}
 	}
-	if (!atomic_add_unless(&ucounts->count, 1, INT_MAX))
+	if (ucounts->count == INT_MAX)
 		ucounts = NULL;
+	else
+		ucounts->count += 1;
 	spin_unlock_irq(&ucounts_lock);
 	return ucounts;
 }
@@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
-	if (atomic_dec_and_test(&ucounts->count)) {
-		spin_lock_irqsave(&ucounts_lock, flags);
+	spin_lock_irqsave(&ucounts_lock, flags);
+	ucounts->count -= 1;
+	if (!ucounts->count)
 		hlist_del_init(&ucounts->node);
-		spin_unlock_irqrestore(&ucounts_lock, flags);
+	else
+		ucounts = NULL;
+	spin_unlock_irqrestore(&ucounts_lock, flags);
 
-		kfree(ucounts);
-	}
+	kfree(ucounts);
 }
 
 static inline bool atomic_inc_below(atomic_t *v, int u)
-- 
cgit v1.2.3


From bd571195c9535c0b074fc7cd1b541b93817ed647 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Mar 2017 15:58:03 +0100
Subject: scsi: qedi: fix build error without DEBUG_FS

Without CONFIG_DEBUG_FS, we run into a link error:

drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_poll':
qedi_iscsi.c:(.text.qedi_ep_poll+0x134): undefined reference to `do_not_recover'
drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_disconnect':
qedi_iscsi.c:(.text.qedi_ep_disconnect+0x36c): undefined reference to `do_not_recover'
drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_connect':
qedi_iscsi.c:(.text.qedi_ep_connect+0x350): undefined reference to `do_not_recover'
drivers/scsi/qedi/qedi_fw.o: In function `qedi_tmf_work':
qedi_fw.c:(.text.qedi_tmf_work+0x3b4): undefined reference to `do_not_recover'

This defines the symbol as a constant in this case, as there is no way to
set it to anything other than zero without DEBUG_FS. In addition, I'm renaming
it to qedi_do_not_recover in order to put it into a driver specific namespace,
as "do_not_recover" is a really bad name for a kernel-wide global identifier
when it is used only in one driver.

Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.")
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_debugfs.c | 16 ++++++++--------
 drivers/scsi/qedi/qedi_fw.c      |  4 ++--
 drivers/scsi/qedi/qedi_gbl.h     |  8 +++++++-
 drivers/scsi/qedi/qedi_iscsi.c   |  8 ++++----
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c
index 955936274241..59417199bf36 100644
--- a/drivers/scsi/qedi/qedi_debugfs.c
+++ b/drivers/scsi/qedi/qedi_debugfs.c
@@ -14,7 +14,7 @@
 #include <linux/debugfs.h>
 #include <linux/module.h>
 
-int do_not_recover;
+int qedi_do_not_recover;
 static struct dentry *qedi_dbg_root;
 
 void
@@ -74,22 +74,22 @@ qedi_dbg_exit(void)
 static ssize_t
 qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg)
 {
-	if (!do_not_recover)
-		do_not_recover = 1;
+	if (!qedi_do_not_recover)
+		qedi_do_not_recover = 1;
 
 	QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-		  do_not_recover);
+		  qedi_do_not_recover);
 	return 0;
 }
 
 static ssize_t
 qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg)
 {
-	if (do_not_recover)
-		do_not_recover = 0;
+	if (qedi_do_not_recover)
+		qedi_do_not_recover = 0;
 
 	QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-		  do_not_recover);
+		  qedi_do_not_recover);
 	return 0;
 }
 
@@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer,
 	if (*ppos)
 		return 0;
 
-	cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover);
+	cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover);
 	cnt = min_t(int, count, cnt - *ppos);
 	*ppos += cnt;
 	return cnt;
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index c9f0ef4e11b3..2bce3efc66a4 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work)
 		  get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id,
 		  qedi_conn->iscsi_conn_id);
 
-	if (do_not_recover) {
+	if (qedi_do_not_recover) {
 		QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
-			 do_not_recover);
+			 qedi_do_not_recover);
 		goto abort_ret;
 	}
 
diff --git a/drivers/scsi/qedi/qedi_gbl.h b/drivers/scsi/qedi/qedi_gbl.h
index 8e488de88ece..63d793f46064 100644
--- a/drivers/scsi/qedi/qedi_gbl.h
+++ b/drivers/scsi/qedi/qedi_gbl.h
@@ -12,8 +12,14 @@
 
 #include "qedi_iscsi.h"
 
+#ifdef CONFIG_DEBUG_FS
+extern int qedi_do_not_recover;
+#else
+#define qedi_do_not_recover (0)
+#endif
+
 extern uint qedi_io_tracing;
-extern int do_not_recover;
+
 extern struct scsi_host_template qedi_host_template;
 extern struct iscsi_transport qedi_iscsi_transport;
 extern const struct qed_iscsi_ops *qedi_ops;
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index b9f79d36142d..4cc474364c50 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
 		return ERR_PTR(ret);
 	}
 
-	if (do_not_recover) {
+	if (qedi_do_not_recover) {
 		ret = -ENOMEM;
 		return ERR_PTR(ret);
 	}
@@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
 	struct qedi_endpoint *qedi_ep;
 	int ret = 0;
 
-	if (do_not_recover)
+	if (qedi_do_not_recover)
 		return 1;
 
 	qedi_ep = ep->dd_data;
@@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 		}
 
 		if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
-			if (do_not_recover) {
+			if (qedi_do_not_recover) {
 				QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
 					  "Do not recover cid=0x%x\n",
 					  qedi_ep->iscsi_cid);
@@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 		}
 	}
 
-	if (do_not_recover)
+	if (qedi_do_not_recover)
 		goto ep_exit_recover;
 
 	switch (qedi_ep->state) {
-- 
cgit v1.2.3


From 934767c56b0d9dbb95a40e9e6e4d9dcdc3a165ad Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 2 Mar 2017 09:21:33 -0800
Subject: scsi: aacraid: Fix typo in blink status

The return status of the adapter check on KERNEL_PANIC is supposed to be
the upper 16 bits of the OMR status register.

Fixes: c421530bf848604e (scsi: aacraid: Reorder Adpater status check)
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 2e5338dec621..7b0410e0f569 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -468,7 +468,7 @@ err_out:
 	return -1;
 
 err_blink:
-	return (status > 16) & 0xFF;
+	return (status >> 16) & 0xFF;
 }
 
 static inline u32 aac_get_vector(struct aac_dev *dev)
-- 
cgit v1.2.3


From 23456565acf6d452e0368f7380aecd584c019c67 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 2 Mar 2017 17:14:47 -0800
Subject: scsi: qla2xxx: Fix ql_dump_buffer

Recent printk changes for KERN_CONT cause this logging to be defectively
emitted on multiple lines.  Fix it.

Also reduces object size a trivial amount.

$ size drivers/scsi/qla2xxx/qla_dbg.o*
   text	   data	    bss	    dec	    hex	filename
  39125	      0	      0	  39125	   98d5	drivers/scsi/qla2xxx/qla_dbg.o.new
  39164	      0	      0	  39164	   98fc	drivers/scsi/qla2xxx/qla_dbg.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_dbg.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 21d9fb7fc887..51b4179469d1 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id,
 	    "%-+5d  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F\n", size);
 	ql_dbg(level, vha, id,
 	    "----- -----------------------------------------------\n");
-	for (cnt = 0; cnt < size; cnt++, buf++) {
-		if (cnt % 16 == 0)
-			ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
-		printk(" %02x", *buf);
-		if (cnt % 16 == 15)
-			printk("\n");
+	for (cnt = 0; cnt < size; cnt += 16) {
+		ql_dbg(level, vha, id, "%04x: ", cnt);
+		print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+			       buf + cnt, min(16U, size - cnt), false);
 	}
-	if (cnt % 16 != 0)
-		printk("\n");
 }
-- 
cgit v1.2.3


From c527de41aea24c2cdb6638818008d810013b4d39 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 3 Mar 2017 07:57:16 -0700
Subject: scsi: vmw_pvscsi: handle the return value from pci_alloc_irq_vectors
 correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It returns the number of vectors allocated when successful, so check for
a negative error only.

Fixes: 2e48e349 ("scsi: vmw_pvscsi: switch to pci_alloc_irq_vectors")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Loïc Yhuel <loic.yhuel@gmail.com>
Tested-by: Loïc Yhuel <loic.yhuel@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/vmw_pvscsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index ef474a748744..c374e3b5c678 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		irq_flag &= ~PCI_IRQ_MSI;
 
 	error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
-	if (error)
+	if (error < 0)
 		goto out_reset_adapter;
 
 	adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
-- 
cgit v1.2.3


From db6b2060bcae1ce1f9bde73a423e710b625ae1ef Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 4 Mar 2017 00:07:04 -0800
Subject: scsi: qedf: Fix defective logging format and argument mismatches

Add __printf compiler verification of format and arguments.  Fix
fallout.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_dbg.h | 13 ++++++++-----
 drivers/scsi/qedf/qedf_fip.c |  2 +-
 drivers/scsi/qedf/qedf_io.c  |  4 ++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h
index 23bd70628a2f..7d173f48a81e 100644
--- a/drivers/scsi/qedf/qedf_dbg.h
+++ b/drivers/scsi/qedf/qedf_dbg.h
@@ -81,14 +81,17 @@ struct qedf_dbg_ctx {
 #define QEDF_INFO(pdev, level, fmt, ...)	\
 		qedf_dbg_info(pdev, __func__, __LINE__, level, fmt,	\
 			      ## __VA_ARGS__)
-
-extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
 			  const char *fmt, ...);
-extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
 			   const char *, ...);
-extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+__printf(4, 5)
+void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
 			    u32 line, const char *, ...);
-extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(5, 6)
+void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
 			  u32 info, const char *fmt, ...);
 
 /* GRC Dump related defines */
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
index 868d423380d1..ed58b9104f58 100644
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -203,7 +203,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
 			case FIP_DT_MAC:
 				mp = (struct fip_mac_desc *)desc;
 				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
-				    "fd_mac=%pM.\n", __func__, mp->fd_mac);
+				    "fd_mac=%pM\n", mp->fd_mac);
 				ether_addr_copy(cvl_mac, mp->fd_mac);
 				break;
 			case FIP_DT_NAME:
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index ee0dcf9d3aba..46debe5034af 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 		} else {
 			refcount = kref_read(&io_req->refcount);
 			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
-			    "%d:0:%d:%d xid=0x%0x op=0x%02x "
+			    "%d:0:%d:%lld xid=0x%0x op=0x%02x "
 			    "lba=%02x%02x%02x%02x cdb_status=%d "
 			    "fcp_resid=0x%x refcount=%d.\n",
 			    qedf->lport->host->host_no, sc_cmd->device->id,
@@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
 
 	sc_cmd->result = result << 16;
 	refcount = kref_read(&io_req->refcount);
-	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing "
 	    "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
 	    "allowed=%d retries=%d refcount=%d.\n",
 	    qedf->lport->host->host_no, sc_cmd->device->id,
-- 
cgit v1.2.3


From fd2b18b4a7bf01453324733a18297ae9a197a4ae Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 6 Mar 2017 10:32:27 -0800
Subject: scsi: qedf: Use vsprintf extension %pad

Using %llx for a dma_addr_t can lead to format/argument mismatches.  Use
%pad and the address of the dma_addr_t instead.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index d9d7a86b5f8b..8e2a160490e6 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
 	}
 
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
-	    "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
-	    qedf->bdq_pbl_dma);
+		  "BDQ PBL addr=0x%p dma=%pad\n",
+		  qedf->bdq_pbl, &qedf->bdq_pbl_dma);
 
 	/*
 	 * Populate BDQ PBL with physical and virtual address of individual
-- 
cgit v1.2.3


From 33cc559a81397bc813c392617d40ddfdfa3cffbd Mon Sep 17 00:00:00 2001
From: Tomas Jasek <tomsik68@gmail.com>
Date: Fri, 3 Mar 2017 13:45:48 +0100
Subject: scsi: lpfc: replace init_timer by setup_timer

This patch shortens every init_timer in lpfc module followed by function
and data assignment using setup_timer.  This is purely cleanup patch, it
does not add new functionality nor remove any existing functionality.

An init_timer call in this form:

    init_timer(&vport->fc_disctmo);
    vport->fc_disctmo.function = lpfc_disc_timeout;
    vport->fc_disctmo.data = vport;

is shortened to:

    setup_timer(&vport->fc_disctmo, lpfc_disc_timeout, vport);

It increases readability and reduces chances of mistakes done by
developers.

Signed-off-by: Tomas Jasek <tomsik68@gmail.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Smart <james.smart@broadcom.com>
Cc: Dick Kennedy <dick.kennedy@broadcom.com>
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: <linux-scsi@vger.kernel.org>
Acked-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_hbadisc.c |  5 ++---
 drivers/scsi/lpfc/lpfc_init.c    | 47 +++++++++++++++-------------------------
 2 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 194a14d5f8a9..2612dac75186 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4344,9 +4344,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 {
 	INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
 	INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
-	init_timer(&ndlp->nlp_delayfunc);
-	ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
-	ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+	setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay,
+			(unsigned long)ndlp);
 	ndlp->nlp_DID = did;
 	ndlp->vport = vport;
 	ndlp->phba = vport->phba;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 0ee429d773f3..f395f2e4aa97 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3734,17 +3734,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	INIT_LIST_HEAD(&vport->rcv_buffer_list);
 	spin_lock_init(&vport->work_port_lock);
 
-	init_timer(&vport->fc_disctmo);
-	vport->fc_disctmo.function = lpfc_disc_timeout;
-	vport->fc_disctmo.data = (unsigned long)vport;
+	setup_timer(&vport->fc_disctmo, lpfc_disc_timeout,
+			(unsigned long)vport);
 
-	init_timer(&vport->els_tmofunc);
-	vport->els_tmofunc.function = lpfc_els_timeout;
-	vport->els_tmofunc.data = (unsigned long)vport;
+	setup_timer(&vport->els_tmofunc, lpfc_els_timeout,
+			(unsigned long)vport);
 
-	init_timer(&vport->delayed_disc_tmo);
-	vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo;
-	vport->delayed_disc_tmo.data = (unsigned long)vport;
+	setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo,
+			(unsigned long)vport);
 
 	error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
 	if (error)
@@ -5406,21 +5403,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->luns);
 
 	/* MBOX heartbeat timer */
-	init_timer(&psli->mbox_tmo);
-	psli->mbox_tmo.function = lpfc_mbox_timeout;
-	psli->mbox_tmo.data = (unsigned long) phba;
+	setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba);
 	/* Fabric block timer */
-	init_timer(&phba->fabric_block_timer);
-	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-	phba->fabric_block_timer.data = (unsigned long) phba;
+	setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout,
+			(unsigned long)phba);
 	/* EA polling mode timer */
-	init_timer(&phba->eratt_poll);
-	phba->eratt_poll.function = lpfc_poll_eratt;
-	phba->eratt_poll.data = (unsigned long) phba;
+	setup_timer(&phba->eratt_poll, lpfc_poll_eratt,
+			(unsigned long)phba);
 	/* Heartbeat timer */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
+	setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba);
 
 	return 0;
 }
@@ -5446,9 +5437,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	 */
 
 	/* FCP polling mode timer */
-	init_timer(&phba->fcp_poll_timer);
-	phba->fcp_poll_timer.function = lpfc_poll_timeout;
-	phba->fcp_poll_timer.data = (unsigned long) phba;
+	setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout,
+			(unsigned long)phba);
 
 	/* Host attention work mask setup */
 	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5617,14 +5607,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	 * Initialize timers used by driver
 	 */
 
-	init_timer(&phba->rrq_tmr);
-	phba->rrq_tmr.function = lpfc_rrq_timeout;
-	phba->rrq_tmr.data = (unsigned long)phba;
+	setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba);
 
 	/* FCF rediscover timer */
-	init_timer(&phba->fcf.redisc_wait);
-	phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
-	phba->fcf.redisc_wait.data = (unsigned long)phba;
+	setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo,
+			(unsigned long)phba);
 
 	/*
 	 * Control structure for handling external multi-buffer mailbox
-- 
cgit v1.2.3


From 70e5afd57d6d97d69bf5fd0187c2bb5a79990504 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:21 -0800
Subject: scsi: lpfc: remove redundant assignment of sgel

From: Colin Ian King <colin.king@canonical.com>

In the NVMET_FCOP_RSP case, sgel is assigned but never used and
hence is redundant and can be removed.

Detected by CoverityScan, CID#1411658 ("Unused value")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index c421e1738ee9..e59a0a89d357 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1445,7 +1445,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 
 	case NVMET_FCOP_RSP:
 		/* Words 0 - 2 */
-		sgel = &rsp->sg[0];
 		physaddr = rsp->rspdma;
 		wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
 		wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
-- 
cgit v1.2.3


From 7aabe84b8a1bb7c3f75fb6a23dc96f8999bdcc8f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:22 -0800
Subject: scsi: lpfc: sanity check hrq is null before dereferencing it

From: Colin Ian King <colin.king@canonical.com>

The sanity check for hrq should be moved to before the deference
of hrq to ensure we don't perform a null pointer deference.

Detected by CoverityScan, CID#1411650 ("Dereference before null check")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d8d8693040ac..562a5286bc47 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -15185,17 +15185,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq = drqp[idx];
 		cq  = cqp[idx];
 
-		if (hrq->entry_count != drq->entry_count) {
-			status = -EINVAL;
-			goto out;
-		}
-
 		/* sanity check on queue memory */
 		if (!hrq || !drq || !cq) {
 			status = -ENODEV;
 			goto out;
 		}
 
+		if (hrq->entry_count != drq->entry_count) {
+			status = -EINVAL;
+			goto out;
+		}
+
 		if (idx == 0) {
 			bf_set(lpfc_mbx_rq_create_num_pages,
 			       &rq_create->u.request,
-- 
cgit v1.2.3


From 332ba3b5d6d27a60d445704ed7c88c7e9f958a30 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:23 -0800
Subject: scsi: lpfc: don't dereference dma_buf->iocbq before null check

From: Colin Ian King <colin.king@canonical.com>

dma_buf->iocbq is being dereferenced immediately before it is
being null checked, so we have a potential null pointer dereference
bug.  Fix this by only dereferencing it only once we have passed
a null check on the pointer.

Detected by CoverityScan, CID#1411652 ("Dereference before null check")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index c61d8d692ede..5986c7957199 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 	}
 
 	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
 	if (!dma_buf->iocbq) {
 		kfree(dma_buf->context);
 		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
@@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 				"2621 Ran out of nvmet iocb/WQEs\n");
 		return NULL;
 	}
+	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
 	nvmewqe = dma_buf->iocbq;
 	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
 	/* Initialize WQE */
-- 
cgit v1.2.3


From d11f54b7d1d40463024be3b40ab7be2c8a84fa43 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:24 -0800
Subject: scsi: lpfc: fix missing spin_unlock on sql_list_lock

From: Colin Ian King <colin.king@canonical.com>

In the case where sglq is null, the current code just returns without
unlocking the spinlock sql_list_lock. Fix this by breaking out of the
while loop and the exit path will then unlock and return NULL as was
the original intention.

Detected by CoverityScan, CID#1411635 ("Missing unlock")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 562a5286bc47..7389c130ffcc 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -952,7 +952,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 	start_sglq = sglq;
 	while (!found) {
 		if (!sglq)
-			return NULL;
+			break;
 		if (ndlp && ndlp->active_rrqs_xri_bitmap &&
 		    test_bit(sglq->sli4_lxritag,
 		    ndlp->active_rrqs_xri_bitmap)) {
-- 
cgit v1.2.3


From 5d181531bc6169e19a02a27d202cf0e982db9d0e Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:25 -0800
Subject: scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters

if REG_VPI fails, the driver was incorrectly issuing INIT_VFI
(a SLI4 command) on a SLI3 adapter.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2d26440e6f2f..d260a1391baf 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -8371,11 +8371,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 			spin_lock_irq(shost->host_lock);
 			vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
 			spin_unlock_irq(shost->host_lock);
-			if (vport->port_type == LPFC_PHYSICAL_PORT
-				&& !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
-				lpfc_issue_init_vfi(vport);
-			else
+			if (mb->mbxStatus == MBX_NOT_FINISHED)
+				break;
+			if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+			    !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+				if (phba->sli_rev == LPFC_SLI_REV4)
+					lpfc_issue_init_vfi(vport);
+				else
+					lpfc_initial_flogi(vport);
+			} else {
 				lpfc_initial_fdisc(vport);
+			}
 			break;
 		}
 	} else {
-- 
cgit v1.2.3


From 8b3616392d32d2b04ce649caf6684da1b7050d8c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:26 -0800
Subject: scsi: lpfc: Fix RCTL value on NVME LS request and response

NVME LS requests and responses had wrong R_CTL values.
Use the FC4 ELS Request and Response defines (defines badly
named, they are FC4 LS's) instead of the base ELS values.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c  | 2 +-
 drivers/scsi/lpfc/lpfc_nvmet.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 609a908ea9db..6346d4dee9ff 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
 	bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
 	bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
-	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
 	bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
 
 	/* Word 6 */
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index e59a0a89d357..6c2221aac394 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1114,7 +1114,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
 	bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
 	bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
 	bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
-	bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+	bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP);
 	bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
 
 	/* Word 6 */
-- 
cgit v1.2.3


From b06a622ffe8dddcc09dad23dc768f7d1540fb4d6 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:27 -0800
Subject: scsi: lpfc: Fix NVME CMD IU byte swapped word 1 problem

Word 1 in NVME CMD IU appears byte swapped from value placed in WQE
Should be Big Endian value in WQE word 16

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 6346d4dee9ff..bf3ccc5d59ac 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -620,15 +620,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
 	 * Embed the payload in the last half of the WQE
 	 * WQE words 16-30 get the NVME CMD IU payload
 	 *
-	 * WQE Word 16 is already setup with flags
-	 * WQE words 17-19 get payload Words 2-4
+	 * WQE words 16-19 get payload Words 1-4
 	 * WQE words 20-21 get payload Words 6-7
 	 * WQE words 22-29 get payload Words 16-23
 	 */
-	wptr = &wqe->words[17];  /* WQE ptr */
+	wptr = &wqe->words[16];  /* WQE ptr */
 	dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
-	dptr += 2;		/* Skip Words 0-1 in payload */
+	dptr++;			/* Skip Word 0 in payload */
 
+	*wptr++ = *dptr++;	/* Word 1 */
 	*wptr++ = *dptr++;	/* Word 2 */
 	*wptr++ = *dptr++;	/* Word 3 */
 	*wptr++ = *dptr++;	/* Word 4 */
@@ -978,9 +978,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
 			       NVME_WRITE_CMD);
 
-			/* Word 16 */
-			wqe->words[16] = LPFC_NVME_EMBED_WRITE;
-
 			phba->fc4NvmeOutputRequests++;
 		} else {
 			/* Word 7 */
@@ -1002,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
 			       NVME_READ_CMD);
 
-			/* Word 16 */
-			wqe->words[16] = LPFC_NVME_EMBED_READ;
-
 			phba->fc4NvmeInputRequests++;
 		}
 	} else {
@@ -1026,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 		/* Word 11 */
 		bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
 
-		/* Word 16 */
-		wqe->words[16] = LPFC_NVME_EMBED_CMD;
-
 		phba->fc4NvmeControlRequests++;
 	}
 	/*
-- 
cgit v1.2.3


From a5068b46958870633ea340692f301507ef78d00b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:28 -0800
Subject: scsi: lpfc: Fix IO submission if WQ is full

For both initiator and target: if WQ is full, return -EBUSY.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c  | 2 +-
 drivers/scsi/lpfc/lpfc_nvmet.c | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index bf3ccc5d59ac..e1bf31eca845 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1310,7 +1310,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 				 "sid: x%x did: x%x oxid: x%x\n",
 				 ret, vport->fc_myDID, ndlp->nlp_DID,
 				 lpfc_ncmd->cur_iocbq.sli4_xritag);
-		ret = -EINVAL;
+		ret = -EBUSY;
 		goto out_free_nvme_buf;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 6c2221aac394..735e2ba70198 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -571,6 +571,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6102 Bad state IO x%x aborted\n",
 				ctxp->oxid);
+		rc = -ENXIO;
 		goto aerr;
 	}
 
@@ -580,6 +581,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6152 FCP Drop IO x%x: Prep\n",
 				ctxp->oxid);
+		rc = -ENXIO;
 		goto aerr;
 	}
 
@@ -618,8 +620,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	ctxp->wqeq->hba_wqidx = 0;
 	nvmewqeq->context2 = NULL;
 	nvmewqeq->context3 = NULL;
+	rc = -EBUSY;
 aerr:
-	return -ENXIO;
+	return rc;
 }
 
 static void
-- 
cgit v1.2.3


From 3ebd9b4701ef77358030a0ef9cb6586db2149712 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:29 -0800
Subject: scsi: lpfc: Fix nvme allocation bug on failed
 nvme_fc_register_localport

nvme bufs get allocated even when the registration fails.
Move allocation into the rsgistration success path.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index e1bf31eca845..26cde7c040ba 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2164,10 +2164,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		lport->vport = vport;
 		INIT_LIST_HEAD(&lport->rport_list);
 		vport->nvmei_support = 1;
+		len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+		vport->phba->total_nvme_bufs += len;
 	}
 
-	len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
-	vport->phba->total_nvme_bufs += len;
 	return ret;
 }
 
-- 
cgit v1.2.3


From 318083ad9230ff13cdac34ae4c4135e0c4e2d9ad Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:30 -0800
Subject: scsi: lpfc: add NVME exchange aborts

previous code did little more than log a message.

This patch adds abort path support, modeled after the SCSI code paths.
Currently addresses only the initiator path. Target path under
development, but stubbed out.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |  1 +
 drivers/scsi/lpfc/lpfc_hbadisc.c |  2 ++
 drivers/scsi/lpfc/lpfc_init.c    |  7 +++++
 drivers/scsi/lpfc/lpfc_nvme.c    | 64 ++++++++++++++++++++++++++++++++++++----
 drivers/scsi/lpfc/lpfc_nvme.h    |  1 +
 drivers/scsi/lpfc/lpfc_nvmet.c   | 21 +++++++++++--
 drivers/scsi/lpfc/lpfc_sli.c     | 51 ++++++++++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_sli4.h    |  6 ++++
 8 files changed, 143 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 0bba2e30b4f0..de6cd57dc7f6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -692,6 +692,7 @@ struct lpfc_hba {
 					 * capability
 					 */
 #define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
+#define NVME_XRI_ABORT_EVENT	0x100000
 
 	uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
 	struct lpfc_dmabuf slim2p;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 2612dac75186..bd8635d303d2 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -641,6 +641,8 @@ lpfc_work_done(struct lpfc_hba *phba)
 			lpfc_handle_rrq_active(phba);
 		if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
 			lpfc_sli4_fcp_xri_abort_event_proc(phba);
+		if (phba->hba_flag & NVME_XRI_ABORT_EVENT)
+			lpfc_sli4_nvme_xri_abort_event_proc(phba);
 		if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
 			lpfc_sli4_els_xri_abort_event_proc(phba);
 		if (phba->hba_flag & ASYNC_EVENT)
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index f395f2e4aa97..a0cba631869e 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5723,6 +5723,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		/* Initialize the Abort nvme buffer list used by driver */
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		/* Fast-path XRI aborted CQ Event work queue list */
+		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
 
 	/* This abort list used by worker thread */
@@ -8960,6 +8962,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
 	/* Pending ELS XRI abort events */
 	list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
 			 &cqelist);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		/* Pending NVME XRI abort events */
+		list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+				 &cqelist);
+	}
 	/* Pending asynnc events */
 	list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
 			 &cqelist);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 26cde7c040ba..b9012fee1632 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1277,6 +1277,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	pnvme_fcreq->private = (void *)lpfc_ncmd;
 	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
 	lpfc_ncmd->nrport = rport;
+	lpfc_ncmd->ndlp = ndlp;
 	lpfc_ncmd->start_time = jiffies;
 
 	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
@@ -1812,10 +1813,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
 						pdma_phys_sgl1, cur_xritag);
 				if (status) {
 					/* failure, put on abort nvme list */
-					lpfc_ncmd->exch_busy = 1;
+					lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
 				} else {
 					/* success, put on NVME buffer list */
-					lpfc_ncmd->exch_busy = 0;
+					lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 					lpfc_ncmd->status = IOSTAT_SUCCESS;
 					num_posted++;
 				}
@@ -1845,10 +1846,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
 					 struct lpfc_nvme_buf, list);
 			if (status) {
 				/* failure, put on abort nvme list */
-				lpfc_ncmd->exch_busy = 1;
+				lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
 			} else {
 				/* success, put on NVME buffer list */
-				lpfc_ncmd->exch_busy = 0;
+				lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 				lpfc_ncmd->status = IOSTAT_SUCCESS;
 				num_posted++;
 			}
@@ -2090,7 +2091,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 	unsigned long iflag = 0;
 
 	lpfc_ncmd->nonsg_phys = 0;
-	if (lpfc_ncmd->exch_busy) {
+	if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
 		spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
 					iflag);
 		lpfc_ncmd->nvmeCmd = NULL;
@@ -2453,3 +2454,56 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 			 "6168: State error: lport %p, rport%p FCID x%06x\n",
 			 vport->localport, ndlp->rport, ndlp->nlp_DID);
 }
+
+/**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+			   struct sli4_wcqe_xri_aborted *axri)
+{
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+	struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+	struct lpfc_nodelist *ndlp;
+	unsigned long iflag = 0;
+	int rrq_empty = 0;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return;
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
+				 &phba->sli4_hba.lpfc_abts_nvme_buf_list,
+				 list) {
+		if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
+			list_del(&lpfc_ncmd->list);
+			lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+			lpfc_ncmd->status = IOSTAT_SUCCESS;
+			spin_unlock(
+				&phba->sli4_hba.abts_nvme_buf_list_lock);
+
+			rrq_empty = list_empty(&phba->active_rrq_list);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			ndlp = lpfc_ncmd->ndlp;
+			if (ndlp) {
+				lpfc_set_rrq_active(
+					phba, ndlp,
+					lpfc_ncmd->cur_iocbq.sli4_lxritag,
+					rxid, 1);
+				lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+			}
+			lpfc_release_nvme_buf(phba, lpfc_ncmd);
+			if (rrq_empty)
+				lpfc_worker_wake_up(phba);
+			return;
+		}
+	}
+	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index b2fae5e813f8..1347deb8dd6c 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -57,6 +57,7 @@ struct lpfc_nvme_buf {
 	struct list_head list;
 	struct nvmefc_fcp_req *nvmeCmd;
 	struct lpfc_nvme_rport *nrport;
+	struct lpfc_nodelist *ndlp;
 
 	uint32_t timeout;
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 735e2ba70198..48ce9858bc11 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -734,6 +734,21 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
 	return 0;
 }
 
+/**
+ * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the nvmet xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVMET aborted xri.
+ **/
+void
+lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+			    struct sli4_wcqe_xri_aborted *axri)
+{
+	/* TODO: work in progress */
+}
+
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
@@ -958,7 +973,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+			"6159 FCP Drop IO x%x: err x%x\n",
 			ctxp->oxid, rc);
 dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
@@ -1683,8 +1698,8 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 	struct lpfc_nodelist *ndlp;
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6067 %s: Entrypoint: sid %x xri %x\n", __func__,
-			sid, xri);
+			"6067 Abort: sid %x xri x%x/x%x\n",
+			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 7389c130ffcc..8a606d0d2c79 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -12212,6 +12212,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
 	}
 }
 
+/**
+ * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 NVME abort XRI events.
+ **/
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the fcp xri abort event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~NVME_XRI_ABORT_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the fcp xri abort events */
+	while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Notify aborted XRI for NVME work queue */
+		if (phba->nvmet_support) {
+			lpfc_sli4_nvmet_xri_aborted(phba,
+						    &cq_event->cqe.wcqe_axri);
+		} else {
+			lpfc_sli4_nvme_xri_aborted(phba,
+						   &cq_event->cqe.wcqe_axri);
+		}
+		/* Free the event processed back to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
 /**
  * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
  * @phba: pointer to lpfc hba data structure.
@@ -12709,10 +12744,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		workposted = true;
 		break;
+	case LPFC_NVME:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
+		/* Set the nvme xri abort event flag */
+		phba->hba_flag |= NVME_XRI_ABORT_EVENT;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0603 Invalid work queue CQE subtype (x%x)\n",
-				cq->subtype);
+				"0603 Invalid CQ subtype %d: "
+				"%08x %08x %08x %08x\n",
+				cq->subtype, wcqe->word0, wcqe->parameter,
+				wcqe->word2, wcqe->word3);
+		lpfc_sli4_cq_event_release(phba, cq_event);
 		workposted = false;
 		break;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 91153c9f6d18..710458cf11d6 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -642,6 +642,7 @@ struct lpfc_sli4_hba {
 	struct list_head sp_asynce_work_queue;
 	struct list_head sp_fcp_xri_aborted_work_queue;
 	struct list_head sp_els_xri_aborted_work_queue;
+	struct list_head sp_nvme_xri_aborted_work_queue;
 	struct list_head sp_unsol_work_queue;
 	struct lpfc_sli4_link link_state;
 	struct lpfc_sli4_lnk_info lnk_info;
@@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
 int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
 			void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
 void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba);
 void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
 void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
 			       struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+				struct sli4_wcqe_xri_aborted *axri);
+void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+				 struct sli4_wcqe_xri_aborted *axri);
 void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
 			       struct sli4_wcqe_xri_aborted *);
 void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *);
-- 
cgit v1.2.3


From 96418b5e2c8867da3279d877f5d1ffabfe460c3d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:31 -0800
Subject: scsi: lpfc: Fix eh_deadline setting for sli3 adapters.

A previous change unilaterally removed the hba reset entry point
from the sli3 host template. This was done to allow tape devices
being used for back up from being removed. Why was this done ?
When there was non-responding device on the fabric, the error
escalation policy would escalate to the reset handler. When the
reset handler was called, it would reset the adapter, dropping
link, thus logging out and terminating all i/o's - on any target.
If there was a tape device on the same adapter that wasn't in
error, it would kill the tape i/o's, effectively killing the
tape device state.  With the reset point removed, the adapter
reset avoided the fabric logout, allowing the other devices to
continue to operate unaffected. A hack - yes. Hint: we really
need a transport I_T nexus reset callback added to the eh process
(in between the SCSI target reset and hba reset points), so a
fc logout could occur to the one bad target only and stop the error
escalation process.

This patch commonizes the approach so it can be used for sli3 and sli4
adapters, but mandates the admin, via module parameter, specifically
identify which adapters the resets are to be removed for. Additionally,
bus_reset, which sends Target Reset TMFs to all targets, is also removed
from the template as it too has the same effect as the adapter reset.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Tested-by:   Laurence Oberman <loberman@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h      |  1 +
 drivers/scsi/lpfc/lpfc_attr.c |  6 +++++
 drivers/scsi/lpfc/lpfc_crtn.h |  4 ++-
 drivers/scsi/lpfc/lpfc_init.c | 61 ++++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/lpfc/lpfc_scsi.c |  3 +--
 5 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index de6cd57dc7f6..763f32dd2d23 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -99,6 +99,7 @@ struct lpfc_sli2_slim;
 #define FC_MAX_ADPTMSG		64
 
 #define MAX_HBAEVT	32
+#define MAX_HBAS_NO_RESET 16
 
 /* Number of MSI-X vectors the driver uses */
 #define LPFC_MSIX_VECTORS	2
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4114cf4308d0..b741dcb8ee64 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3010,6 +3010,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
 static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
 		   lpfc_poll_show, lpfc_poll_store);
 
+int lpfc_no_hba_reset_cnt;
+unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444);
+MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset");
+
 LPFC_ATTR(sli_mode, 0, 0, 3,
 	"SLI mode selector:"
 	" 0 - auto (SLI-3 if supported),"
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 843dd73004da..54e6ac42fbcd 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -384,7 +384,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *);
 extern struct device_attribute *lpfc_hba_attrs[];
 extern struct device_attribute *lpfc_vport_attrs[];
 extern struct scsi_host_template lpfc_template;
-extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_no_hr;
 extern struct scsi_host_template lpfc_template_nvme;
 extern struct scsi_host_template lpfc_vport_template;
 extern struct fc_function_template lpfc_transport_functions;
@@ -554,3 +554,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_wcqe_complete *abts_cmpl);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
+extern int lpfc_no_hba_reset_cnt;
+extern unsigned long lpfc_no_hba_reset[];
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a0cba631869e..4fa21a9fd883 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3555,6 +3555,44 @@ out_free_mem:
 	return rc;
 }
 
+static uint64_t
+lpfc_get_wwpn(struct lpfc_hba *phba)
+{
+	uint64_t wwn;
+	int rc;
+	LPFC_MBOXQ_t *mboxq;
+	MAILBOX_t *mb;
+
+
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+						GFP_KERNEL);
+	if (!mboxq)
+		return (uint64_t)-1;
+
+	/* First get WWN of HBA instance */
+	lpfc_read_nv(phba, mboxq);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"6019 Mailbox failed , mbxCmd x%x "
+				"READ_NV, mbxStatus x%x\n",
+				bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+				bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return (uint64_t) -1;
+	}
+	mb = &mboxq->u.mb;
+	memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t));
+	/* wwn is WWPN of HBA instance */
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		return be64_to_cpu(wwn);
+	else
+		return (((wwn & 0xffffffff00000000) >> 32) |
+			((wwn & 0x00000000ffffffff) << 32));
+
+}
+
 /**
  * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
@@ -3676,17 +3714,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	struct lpfc_vport *vport;
 	struct Scsi_Host  *shost = NULL;
 	int error = 0;
+	int i;
+	uint64_t wwn;
+	bool use_no_reset_hba = false;
+
+	wwn = lpfc_get_wwpn(phba);
+
+	for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
+		if (wwn == lpfc_no_hba_reset[i]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6020 Setting use_no_reset port=%llx\n",
+					wwn);
+			use_no_reset_hba = true;
+			break;
+		}
+	}
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
 		if (dev != &phba->pcidev->dev) {
 			shost = scsi_host_alloc(&lpfc_vport_template,
 						sizeof(struct lpfc_vport));
 		} else {
-			if (phba->sli_rev == LPFC_SLI_REV4)
+			if (!use_no_reset_hba)
 				shost = scsi_host_alloc(&lpfc_template,
 						sizeof(struct lpfc_vport));
 			else
-				shost = scsi_host_alloc(&lpfc_template_s3,
+				shost = scsi_host_alloc(&lpfc_template_no_hr,
 						sizeof(struct lpfc_vport));
 		}
 	} else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
@@ -5472,7 +5525,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* Initialize the host templates the configured values. */
 	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-	lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 
 	/* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
 	if (phba->cfg_enable_bg) {
@@ -5693,6 +5747,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* Initialize the host templates with the updated values. */
 	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
 
 	if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
 		phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 9d6384af9fce..bcfd6d6ab16d 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5953,7 +5953,7 @@ struct scsi_host_template lpfc_template_nvme = {
 	.track_queue_depth	= 0,
 };
 
-struct scsi_host_template lpfc_template_s3 = {
+struct scsi_host_template lpfc_template_no_hr = {
 	.module			= THIS_MODULE,
 	.name			= LPFC_DRIVER_NAME,
 	.proc_name		= LPFC_DRIVER_NAME,
@@ -6015,7 +6015,6 @@ struct scsi_host_template lpfc_vport_template = {
 	.eh_abort_handler	= lpfc_abort_handler,
 	.eh_device_reset_handler = lpfc_device_reset_handler,
 	.eh_target_reset_handler = lpfc_target_reset_handler,
-	.eh_bus_reset_handler	= lpfc_bus_reset_handler,
 	.slave_alloc		= lpfc_slave_alloc,
 	.slave_configure	= lpfc_slave_configure,
 	.slave_destroy		= lpfc_slave_destroy,
-- 
cgit v1.2.3


From 856984b71cefab1580e9439e5382db1247d689b0 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:32 -0800
Subject: scsi: lpfc: add transport eh_timed_out reference

Christoph's prior patch missed the template for the sli3 adapters,
which is now the "no host reset" template. Add the transport
eh_timed_out handler to the no host reset template

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_scsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index bcfd6d6ab16d..54fd0c81ceaf 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5959,6 +5959,7 @@ struct scsi_host_template lpfc_template_no_hr = {
 	.proc_name		= LPFC_DRIVER_NAME,
 	.info			= lpfc_info,
 	.queuecommand		= lpfc_queuecommand,
+	.eh_timed_out		= fc_eh_timed_out,
 	.eh_abort_handler	= lpfc_abort_handler,
 	.eh_device_reset_handler = lpfc_device_reset_handler,
 	.eh_target_reset_handler = lpfc_target_reset_handler,
-- 
cgit v1.2.3


From 166d721120c1cf768af11706c3e0411324bf138f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:33 -0800
Subject: scsi: lpfc: Rework lpfc Kconfig for NVME options

Reworked Kconfig so that lfpc only requires the scsi stack.
NVME Initiator and NVME Target support can be enabled if
the other NVMe subsystems have been enabled.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig           | 19 ++++++++++++++++---
 drivers/scsi/lpfc/lpfc_nvme.c  | 18 +++++++++++++++---
 drivers/scsi/lpfc/lpfc_nvmet.c | 10 ++++++++++
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 230043c1c90f..4bf55b5d78be 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1241,19 +1241,32 @@ config SCSI_LPFC
 	tristate "Emulex LightPulse Fibre Channel Support"
 	depends on PCI && SCSI
 	depends on SCSI_FC_ATTRS
-	depends on NVME_FC && NVME_TARGET_FC
 	select CRC_T10DIF
-	help
+	---help---
           This lpfc driver supports the Emulex LightPulse
           Family of Fibre Channel PCI host adapters.
 
 config SCSI_LPFC_DEBUG_FS
 	bool "Emulex LightPulse Fibre Channel debugfs Support"
 	depends on SCSI_LPFC && DEBUG_FS
-	help
+	---help---
 	  This makes debugging information from the lpfc driver
 	  available via the debugfs filesystem.
 
+config LPFC_NVME_INITIATOR
+	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+	depends on SCSI_LPFC && NVME_FC
+	---help---
+	  This enables NVME Initiator support in the Emulex lpfc driver.
+
+config LPFC_NVME_TARGET
+	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+	depends on SCSI_LPFC && NVME_TARGET_FC
+	---help---
+	  This enables NVME Target support in the Emulex lpfc driver.
+	  Target enablement must still be enabled on a per adapter
+	  basis by module parameters.
+
 config SCSI_SIM710
 	tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
 	depends on (EISA || MCA) && SCSI
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index b9012fee1632..0a4c19081409 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2127,11 +2127,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 int
 lpfc_nvme_create_localport(struct lpfc_vport *vport)
 {
+	int ret = 0;
 	struct lpfc_hba  *phba = vport->phba;
 	struct nvme_fc_port_info nfcp_info;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	int len, ret = 0;
+	int len;
 
 	/* Initialize this localport instance.  The vport wwn usage ensures
 	 * that NPIV is accounted for.
@@ -2148,8 +2149,12 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
 	 */
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
 					 &vport->phba->pcidev->dev, &localport);
+#else
+	ret = -ENOMEM;
+#endif
 	if (!ret) {
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
 				 "6005 Successfully registered local "
@@ -2185,6 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2200,7 +2206,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6011 Destroying NVME localport %p\n",
 			 localport);
-
 	list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
 		/* The last node ref has to get released now before the rport
 		 * private memory area is released by the transport.
@@ -2214,6 +2219,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 					 "6008 rport fail destroy %x\n", ret);
 		wait_for_completion_timeout(&rport->rport_unreg_done, 5);
 	}
+
 	/* lport's rport list is clear.  Unregister
 	 * lport and release resources.
 	 */
@@ -2237,6 +2243,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 				 "Failed, status x%x\n",
 				 ret);
 	}
+#endif
 }
 
 void
@@ -2267,6 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 int
 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	int ret = 0;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
@@ -2340,7 +2348,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 			rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
 		rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
 		rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
-
 		ret = nvme_fc_register_remoteport(localport, &rpinfo,
 						  &remote_port);
 		if (!ret) {
@@ -2376,6 +2383,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 				 ndlp->nlp_type, ndlp->nlp_DID, ndlp);
 	}
 	return ret;
+#else
+	return 0;
+#endif
 }
 
 /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
@@ -2393,6 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	int ret;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
@@ -2450,6 +2461,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	return;
 
  input_err:
+#endif
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
 			 "6168: State error: lport %p, rport%p FCID x%06x\n",
 			 vport->localport, ndlp->rport, ndlp->nlp_DID);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 48ce9858bc11..a69ca6ea1d6c 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -671,9 +671,13 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
 					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
 
+#ifdef CONFIG_LPFC_NVME_TARGET
 	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
 					     &phba->pcidev->dev,
 					     &phba->targetport);
+#else
+	error = -ENOMEM;
+#endif
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 				"6025 Cannot register NVME targetport "
@@ -752,6 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
 	struct lpfc_nvmet_tgtport *tgtp;
 
 	if (phba->nvmet_support == 0)
@@ -763,6 +768,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
 	}
 	phba->targetport = NULL;
+#endif
 }
 
 /**
@@ -782,6 +788,7 @@ static void
 lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			   struct hbq_dmabuf *nvmebuf)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -862,6 +869,7 @@ dropit:
 
 	atomic_inc(&tgtp->xmt_ls_abort);
 	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+#endif
 }
 
 /**
@@ -883,6 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			    struct rqb_dmabuf *nvmebuf,
 			    uint64_t isr_timestamp)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
@@ -988,6 +997,7 @@ dropit:
 		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
 		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
 	}
+#endif
 }
 
 /**
-- 
cgit v1.2.3


From 43140ca68d1a071ddbe92f10a3256e01701ae390 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:34 -0800
Subject: scsi: lpfc: Rename LPFC_MAX_EQ_DELAY to LPFC_MAX_EQ_DELAY_EQID_CNT

Without apriori understanding of what the define is, the name gives
a very different impression of what it is (a max delay value
for an EQ).  Rename the define so it reflects what it is: the number
of EQ IDs that can be set in one instance of the MODIFY_EQ_DELAY
mbx command.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c | 3 ++-
 drivers/scsi/lpfc/lpfc_hw4.h  | 4 ++--
 drivers/scsi/lpfc/lpfc_init.c | 7 ++-----
 drivers/scsi/lpfc/lpfc_sli.c  | 5 ++++-
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index b741dcb8ee64..fbd3a563be53 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4457,7 +4457,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	phba->cfg_fcp_imax = (uint32_t)val;
-	for (i = 0; i < phba->io_channel_irqs; i++)
+
+	for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, i);
 
 	return strlen(buf);
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index cfdb068a3bfc..15277705cb6b 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1001,7 +1001,7 @@ struct eq_delay_info {
 	uint32_t phase;
 	uint32_t delay_multi;
 };
-#define	LPFC_MAX_EQ_DELAY	8
+#define	LPFC_MAX_EQ_DELAY_EQID_CNT	8
 
 struct sgl_page_pairs {
 	uint32_t sgl_pg0_addr_lo;
@@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay {
 	union {
 		struct {
 			uint32_t num_eq;
-			struct eq_delay_info eq[LPFC_MAX_EQ_DELAY];
+			struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT];
 		} request;
 		struct {
 			uint32_t word0;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4fa21a9fd883..c883110178ea 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -8756,12 +8756,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		}
 	}
 
-	/*
-	 * Configure EQ delay multipier for interrupt coalescing using
-	 * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
-	 */
-	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, qidx);
+
 	return 0;
 
 out_destroy:
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 8a606d0d2c79..618cdacf13dd 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,3 +1,4 @@
+
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
@@ -13874,6 +13875,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
  * @startq: The starting FCP EQ to modify
  *
  * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
+ * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
+ * updated in one mailbox command.
  *
  * The @phba struct is used to send mailbox command to HBA. The @startq
  * is used to get the starting FCP EQ to change.
@@ -13926,7 +13929,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 		eq_delay->u.request.eq[cnt].phase = 0;
 		eq_delay->u.request.eq[cnt].delay_multi = dmult;
 		cnt++;
-		if (cnt >= LPFC_MAX_EQ_DELAY)
+		if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT)
 			break;
 	}
 	eq_delay->u.request.num_eq = cnt;
-- 
cgit v1.2.3


From da6b044a28fe603fe2c3fd908cda8150aa0abe74 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:35 -0800
Subject: scsi: lpfc: correct double print

Correct a merge error that had debug data printed twice for the
same element

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 9f4798e9d938..913eed822cb8 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -3653,17 +3653,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
 			goto pass_check;
 		}
-		/* NVME LS complete queue */
-		if (phba->sli4_hba.nvmels_cq &&
-		    phba->sli4_hba.nvmels_cq->queue_id == queid) {
-			/* Sanity check */
-			rc = lpfc_idiag_que_param_check(
-					phba->sli4_hba.nvmels_cq, index, count);
-			if (rc)
-				goto error_out;
-			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
-			goto pass_check;
-		}
 		/* FCP complete queue */
 		if (phba->sli4_hba.fcp_cq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -3738,17 +3727,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
 			goto pass_check;
 		}
-		/* NVME LS work queue */
-		if (phba->sli4_hba.nvmels_wq &&
-		    phba->sli4_hba.nvmels_wq->queue_id == queid) {
-			/* Sanity check */
-			rc = lpfc_idiag_que_param_check(
-					phba->sli4_hba.nvmels_wq, index, count);
-			if (rc)
-				goto error_out;
-			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
-			goto pass_check;
-		}
 		/* FCP work queue */
 		if (phba->sli4_hba.fcp_wq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
-- 
cgit v1.2.3


From ba3bd6e2a9a2753439a1fd1fe39e8d5162fb3aa9 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:36 -0800
Subject: scsi: lpfc: remove dead sli3 nvme code

Remove nvme teardown calls that should not be there on sli3 devices

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index c883110178ea..661bd25a404a 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10446,12 +10446,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
 
-	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
-	 * localports are destroyed after to cleanup all transport memory.
-	 */
 	lpfc_cleanup(vport);
-	lpfc_nvmet_destroy_targetport(phba);
-	lpfc_nvme_destroy_localport(vport);
 
 	/*
 	 * Bring down the SLI Layer. This step disable all interrupts,
-- 
cgit v1.2.3


From cd46cdedb3238f1f878c42650ec05c6344c7083d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:37 -0800
Subject: scsi: lpfc: correct rdp diag portnames

NVME merge reverted diag port names to the physical port.
They should be the vport.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index d260a1391baf..d9c61d030034 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5177,15 +5177,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
 
 static uint32_t
 lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
-		struct lpfc_hba *phba)
+		struct lpfc_vport *vport)
 {
 
 	desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
 
-	memcpy(desc->port_names.wwnn, phba->wwnn,
+	memcpy(desc->port_names.wwnn, &vport->fc_nodename,
 			sizeof(desc->port_names.wwnn));
 
-	memcpy(desc->port_names.wwpn, phba->wwpn,
+	memcpy(desc->port_names.wwpn, &vport->fc_portname,
 			sizeof(desc->port_names.wwpn));
 
 	desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5279,7 +5279,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context,
 	len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *)
 				       (len + pcmd), &rdp_context->link_stat);
 	len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *)
-					     (len + pcmd), phba);
+					     (len + pcmd), vport);
 	len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *)
 					(len + pcmd), vport, ndlp);
 	len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),
-- 
cgit v1.2.3


From 2ade92ae6d6572858acb2bde6d3664af3ad592e2 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:38 -0800
Subject: scsi: lpfc: code cleanups in NVME initiator base

This patch addresses the smatch issues identified by Dan Carpenter
in http://www.spinics.net/lists/linux-scsi/msg105663.html

The issues are:

drivers/scsi/lpfc/lpfc_hbadisc.c:316 lpfc_dev_loss_tmo_handler()
warn: we tested 'vport->load_flag & 2' before and it was 'false'

Action: removed item from test

drivers/scsi/lpfc/lpfc_hbadisc.c:701 lpfc_work_done()
warn: test_bit() takes a bit number

Action: changed definition so bit number

drivers/scsi/lpfc/lpfc_hbadisc.c:2206 lpfc_mbx_cmpl_fcf_scan_read_fcf_rec()
error: uninitialized symbol 'vlan_id'.
drivers/scsi/lpfc/lpfc_hbadisc.c:2582 lpfc_mbx_cmpl_fcf_rr_read_fcf_rec()
error: uninitialized symbol 'vlan_id'.
drivers/scsi/lpfc/lpfc_hbadisc.c:2683 lpfc_mbx_cmpl_read_fcf_rec() error:
uninitialized symbol 'vlan_id'.

Action: initilized value

drivers/scsi/lpfc/lpfc_hbadisc.c:4025 lpfc_register_remote_port()
error: we previously assumed 'rdata' could be null (see line 4023)

Action: refactored check block

drivers/scsi/lpfc/lpfc_hbadisc.c:4613 lpfc_sli4_dequeue_nport_iocbs()
error: double unlock 'irq:'

Action: removed inner irq reference

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |  2 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 763f32dd2d23..257bbdd0f0b8 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -105,7 +105,7 @@ struct lpfc_sli2_slim;
 #define LPFC_MSIX_VECTORS	2
 
 /* lpfc wait event data ready flag */
-#define LPFC_DATA_READY		(1<<0)
+#define LPFC_DATA_READY		0	/* bit 0 */
 
 /* queue dump line buffer size */
 #define LPFC_LBUF_SZ		128
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index bd8635d303d2..180b072beef6 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 				 ndlp->nlp_state, ndlp->nlp_rpi);
 	}
 
-	if (!(vport->load_flag & FC_UNLOADING) &&
-	    !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+	if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
 	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
 	    (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
 	    (ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) &&
@@ -2175,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	uint32_t boot_flag, addr_mode;
 	uint16_t fcf_index, next_fcf_index;
 	struct lpfc_fcf_rec *fcf_rec = NULL;
-	uint16_t vlan_id;
+	uint16_t vlan_id = LPFC_FCOE_NULL_VID;
 	bool select_new_fcf;
 	int rc;
 
@@ -4022,9 +4021,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		rdata = rport->dd_data;
 		/* break the link before dropping the ref */
 		ndlp->rport = NULL;
-		if (rdata && rdata->pnode == ndlp)
-			lpfc_nlp_put(ndlp);
-		rdata->pnode = NULL;
+		if (rdata) {
+			if (rdata->pnode == ndlp)
+				lpfc_nlp_put(ndlp);
+			rdata->pnode = NULL;
+		}
 		/* drop reference for earlier registeration */
 		put_device(&rport->dev);
 	}
@@ -4607,9 +4608,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
 		pring = qp->pring;
 		if (!pring)
 			continue;
-		spin_lock_irq(&pring->ring_lock);
+		spin_lock(&pring->ring_lock);
 		__lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
-		spin_unlock_irq(&pring->ring_lock);
+		spin_unlock(&pring->ring_lock);
 	}
 	spin_unlock_irq(&phba->hbalock);
 }
-- 
cgit v1.2.3


From b5ccc7d61c76006f839b41c6f15876342b46cb02 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:39 -0800
Subject: scsi: lpfc: code cleanups in NVME initiator discovery

This patch addresses the smatch issues identified by Dan Carpenter
in http://www.spinics.net/lists/linux-scsi/msg105665.html

The issues are:

drivers/scsi/lpfc/lpfc_ct.c:943 lpfc_cmpl_ct_cmd_gft_id()
error: we previously assumed 'ndlp' could be null (see line 928)

Action: moved under if check

drivers/scsi/lpfc/lpfc_nvmet.c:1694 lpfc_nvmet_unsol_issue_abort()
error: we previously assumed 'ndlp' could be null (see line 1690)

Action: conditionalized arg in printf stmt

drivers/scsi/lpfc/lpfc_nvmet.c:1792 lpfc_nvmet_sol_fcp_issue_abort()
error: we previously assumed 'ndlp' could be null (see line 1788)

Action: conditionalized arg in printf stmt

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_ct.c    | 2 +-
 drivers/scsi/lpfc/lpfc_nvmet.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index c22bb3f887e1..d3e9af983015 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -939,8 +939,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 					 "FC4 x%08x, Data: x%08x x%08x\n",
 					 ndlp, did, ndlp->nlp_fc4_type,
 					 FC_TYPE_FCP, FC_TYPE_NVME);
+			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 		}
-		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index a69ca6ea1d6c..b7739a554fe0 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1720,7 +1720,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6134 Drop ABTS - wrong NDLP state x%x.\n",
-				ndlp->nlp_state);
+				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
 		/* No failure to an ABTS request. */
 		return 0;
@@ -1818,7 +1818,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6160 Drop ABTS - wrong NDLP state x%x.\n",
-				ndlp->nlp_state);
+				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
 		/* No failure to an ABTS request. */
 		return 0;
-- 
cgit v1.2.3


From eeb810849a20e32aa1b60335e46ea5446ba07e1f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:40 -0800
Subject: scsi: lpfc: revise version number to 11.2.0.10

Revise lpfc version number to 11.2.0.10

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 86c6c9b26b82..d4e95e28f4e3 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.7"
+#define LPFC_DRIVER_VERSION "11.2.0.10"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
-- 
cgit v1.2.3


From 1fbdbcea80056acfc8c8506594744f5ec52208c1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 17:05:57 -0800
Subject: avr32: Fix build error caused by include file reshuffling

Various avr32 builds fail:

  arch/avr32/oprofile/backtrace.c:58: error: dereferencing pointer to incomplete type
  arch/avr32/oprofile/backtrace.c:60: error: implicit declaration of function 'user_mode'

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Hans-Christian Noren Egtvedt <egtvedt@samfundet.no>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: oprofile-list@lists.sf.net
Fixes: f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from ...")
Link: http://lkml.kernel.org/r/1488762357-4500-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/avr32/oprofile/backtrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c
index 75d9ad6f99cf..29cf2f191bfd 100644
--- a/arch/avr32/oprofile/backtrace.c
+++ b/arch/avr32/oprofile/backtrace.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/sched.h>
+#include <linux/ptrace.h>
 #include <linux/uaccess.h>
 
 /* The first two words of each frame on the stack look like this if we have
-- 
cgit v1.2.3


From 80aa1a54f054a1c71cc88e0cad6cfa0d20a10f23 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 10:27:14 -0800
Subject: h8300: Fix build breakage caused by header file changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following h8300 build failures:

  arch/h8300/kernel/ptrace_h.c: In function ‘trace_trap’:
  arch/h8300/kernel/ptrace_h.c:253:3: error: implicit declaration of function ‘force_sig’

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: uclinux-h8-devel@lists.sourceforge.jp
Fixes: c3edc4010e9d ("sched/headers: Move task_struct::signal and ...")
Link: http://lkml.kernel.org/r/1488738434-3504-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/h8300/kernel/ptrace_h.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c
index fe3b5673baba..f5ff3b794c85 100644
--- a/arch/h8300/kernel/ptrace_h.c
+++ b/arch/h8300/kernel/ptrace_h.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/ptrace.h>
 
 #define BREAKINST 0x5730 /* trapa #3 */
-- 
cgit v1.2.3


From bb35e4515411396219431fa235bf21bf9c2794e9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 17:13:31 -0800
Subject: drivers/char/nwbutton: Fix build breakage caused by include file
 reshuffling

Fix:

  drivers/char/nwbutton.c: In function 'button_sequence_finished':
  drivers/char/nwbutton.c:134:3: error: implicit declaration of function 'kill_cad_pid'

The declaration has been moved from one include file to another.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: c3edc4010e9d102 ("sched/headers: Move task_struct::signal and ...")
Link: http://lkml.kernel.org/r/1488762811-9022-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/char/nwbutton.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index a5b1eb276c0b..e6d0d271c58c 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -6,7 +6,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/timer.h>
-- 
cgit v1.2.3


From 5c51f4ae84df0f9df33ac08aa5be50061a8b4242 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 2 Mar 2017 16:57:23 -0600
Subject: objtool: Fix another GCC jump table detection issue

Arnd Bergmann reported a (false positive) objtool warning:

  drivers/infiniband/sw/rxe/rxe_resp.o: warning: objtool: rxe_responder()+0xfe: sibling call from callable instruction with changed frame pointer

The issue is in find_switch_table().  It tries to find a switch
statement's jump table by walking backwards from an indirect jump
instruction, looking for a relocation to the .rodata section.  In this
case it stopped walking prematurely: the first .rodata relocation it
encountered was for a variable (resp_state_name) instead of a jump
table, so it just assumed there wasn't a jump table.

The fix is to ignore any .rodata relocation which refers to an ELF
object symbol.  This works because the jump tables are anonymous and
have no symbols associated with them.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 3732710ff6f2 ("objtool: Improve rare switch jump table pattern detection")
Link: http://lkml.kernel.org/r/20170302225723.3ndbsnl4hkqbne7a@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/builtin-check.c | 15 ++++++++++++---
 tools/objtool/elf.c           | 12 ++++++++++++
 tools/objtool/elf.h           |  1 +
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 4cfdbb5b6967..066086dd59a8 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		     insn->jump_dest->offset > orig_insn->offset))
 		    break;
 
+		/* look for a relocation which references .rodata */
 		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
 						    insn->len);
-		if (text_rela && text_rela->sym == file->rodata->sym)
-			return find_rela_by_dest(file->rodata,
-						 text_rela->addend);
+		if (!text_rela || text_rela->sym != file->rodata->sym)
+			continue;
+
+		/*
+		 * Make sure the .rodata address isn't associated with a
+		 * symbol.  gcc jump tables are anonymous data.
+		 */
+		if (find_symbol_containing(file->rodata, text_rela->addend))
+			continue;
+
+		return find_rela_by_dest(file->rodata, text_rela->addend);
 	}
 
 	return NULL;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 0d7983ac63ef..d897702ce742 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 	return NULL;
 }
 
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+{
+	struct symbol *sym;
+
+	list_for_each_entry(sym, &sec->symbol_list, list)
+		if (sym->type != STT_SECTION &&
+		    offset >= sym->offset && offset < sym->offset + sym->len)
+			return sym;
+
+	return NULL;
+}
+
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 				     unsigned int len)
 {
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index aa1ff6596684..731973e1a3f5 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -79,6 +79,7 @@ struct elf {
 struct elf *elf_open(const char *name);
 struct section *find_section_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 				     unsigned int len);
-- 
cgit v1.2.3


From fa3aa7a54fe6d3abf128f13cd4bbd40eaa48fed2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 7 Mar 2017 10:55:34 +0100
Subject: jiffies: Revert bogus conversion of NSEC_PER_SEC to TICK_NSEC

commit 93825f2ec736 converted NSEC_PER_SEC to TICK_NSEC because the author
confused NSEC_PER_JIFFY with NSEC_PER_SEC.

As a result, the calculation of refined jiffies got broken, triggering
lockups.

Fixes: 93825f2ec736 ("jiffies: Reuse TICK_NSEC instead of NSEC_PER_JIFFY")
Reported-and-tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1488880534-3777-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/jiffies.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 7906b3f0c41a..497719127bf9 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second)
 	shift_hz += cycles_per_tick/2;
 	do_div(shift_hz, cycles_per_tick);
 	/* Calculate nsec_per_tick using shift_hz */
-	nsec_per_tick = (u64)TICK_NSEC << 8;
+	nsec_per_tick = (u64)NSEC_PER_SEC << 8;
 	nsec_per_tick += (u32)shift_hz/2;
 	do_div(nsec_per_tick, (u32)shift_hz);
 
-- 
cgit v1.2.3


From 68598d2ea886322f9b4b0058e5b288418622de95 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 1 Mar 2017 02:12:50 +0300
Subject: btrfs: remove btrfs_err_str function from uapi/linux/btrfs.h

btrfs_err_str function is not called from anywhere and is replicated
in the userspace headers for btrfs-progs.

It's removal also fixes the following linux/btrfs.h userspace
compilation error:

/usr/include/linux/btrfs.h: In function 'btrfs_err_str':
/usr/include/linux/btrfs.h:740:11: error: 'NULL' undeclared (first use in this function)
    return NULL;

Suggested-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index db4c253f8011..dcfc3a5a9cb1 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -713,33 +713,6 @@ enum btrfs_err_code {
 	BTRFS_ERROR_DEV_ONLY_WRITABLE,
 	BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
 };
-/* An error code to error string mapping for the kernel
-*  error codes
-*/
-static inline char *btrfs_err_str(enum btrfs_err_code err_code)
-{
-	switch (err_code) {
-		case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET:
-			return "unable to go below two devices on raid1";
-		case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
-			return "unable to go below four devices on raid10";
-		case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
-			return "unable to go below two devices on raid5";
-		case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
-			return "unable to go below three devices on raid6";
-		case BTRFS_ERROR_DEV_TGT_REPLACE:
-			return "unable to remove the dev_replace target dev";
-		case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
-			return "no missing devices found to remove";
-		case BTRFS_ERROR_DEV_ONLY_WRITABLE:
-			return "unable to remove the only writeable device";
-		case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
-			return "add/delete/balance/replace/resize operation "\
-				"in progress";
-		default:
-			return NULL;
-	}
-}
 
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
 				   struct btrfs_ioctl_vol_args)
-- 
cgit v1.2.3


From cda82ace3d4eff6a38f00a65db435fe35a3916f0 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 21 Dec 2016 11:26:55 +0100
Subject: dt-bindings: arm: update Armada CP110 system controller binding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It turns out that in the CP110 HW block present in Marvell Armada
7K/8K SoCs, gatable clock n°18 not only controls SD/MMC, but also the
GOP block. This commit updates the Device Tree binding for this piece
of hardware accordingly.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 .../devicetree/bindings/arm/marvell/cp110-system-controller0.txt    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
index 30c546900b60..07dbb358182c 100644
--- a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
+++ b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
@@ -45,7 +45,7 @@ The following clocks are available:
    - 1 15	SATA
    - 1 16	SATA USB
    - 1 17	Main
-   - 1 18	SD/MMC
+   - 1 18	SD/MMC/GOP
    - 1 21	Slow IO (SPI, NOR, BootROM, I2C, UART)
    - 1 22	USB3H0
    - 1 23	USB3H1
@@ -65,7 +65,7 @@ Required properties:
 	"cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio",
 	"cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none",
 	"cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata",
-	"cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io",
+	"cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io",
 	"cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197";
 
 Example:
@@ -78,6 +78,6 @@ Example:
 		gate-clock-output-names = "cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio",
 			"cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none",
 			"cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata",
-			"cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io",
+			"cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io",
 			"cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197";
 	};
-- 
cgit v1.2.3


From 253160a8ad06bcc1c1db16a58b1f06d5128f6c5e Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Mon, 20 Feb 2017 15:20:56 +0200
Subject: clk: core: Copy connection id

Some drivers use sprintf to build clk connection id names but the clk
core will save those strings and occasionally print them back. Duplicate
the con_id strings instead of fixing all the users.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 0fb39fe217d1..67201f67a14a 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2502,7 +2502,7 @@ struct clk *__clk_create_clk(struct clk_hw *hw, const char *dev_id,
 
 	clk->core = hw->core;
 	clk->dev_id = dev_id;
-	clk->con_id = con_id;
+	clk->con_id = kstrdup_const(con_id, GFP_KERNEL);
 	clk->max_rate = ULONG_MAX;
 
 	clk_prepare_lock();
@@ -2518,6 +2518,7 @@ void __clk_free_clk(struct clk *clk)
 	hlist_del(&clk->clks_node);
 	clk_prepare_unlock();
 
+	kfree_const(clk->con_id);
 	kfree(clk);
 }
 
-- 
cgit v1.2.3


From 9afd30dbc82a9dbea4101aba57beb2a2a7e1b8d5 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 28 Feb 2017 18:53:53 +0100
Subject: libceph: fix crush_decode() for older maps

Older (shorter) CRUSH maps too need to be finalized.

Fixes: 66a0e2d579db ("crush: remove mutable part of CRUSH map")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 6824c0ec8373..cc22dd282a3e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	dout("crush decode tunable chooseleaf_stable = %d\n",
 	     c->chooseleaf_stable);
 
-	crush_finalize(c);
-
 done:
+	crush_finalize(c);
 	dout("crush_decode success\n");
 	return c;
 
-- 
cgit v1.2.3


From b581a5854eee4b7851dedb0f8c2ceb54fb902c06 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 1 Mar 2017 17:33:27 +0100
Subject: libceph: don't set weight to IN when OSD is destroyed

Since ceph.git commit 4e28f9e63644 ("osd/OSDMap: clear osd_info,
osd_xinfo on osd deletion"), weight is set to IN when OSD is deleted.
This changes the result of applying an incremental for clients, not
just OSDs.  Because CRUSH computations are obviously affected,
pre-4e28f9e63644 servers disagree with post-4e28f9e63644 clients on
object placement, resulting in misdirected requests.

Mirrors ceph.git commit a6009d1039a55e2c77f431662b3d6cc5a8e8e63f.

Fixes: 930c53286977 ("libceph: apply new_state before new_up_client on incrementals")
Link: http://tracker.ceph.com/issues/19122
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/osdmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index cc22dd282a3e..ffe9e904d4d1 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1379,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end,
 		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
 		    (xorstate & CEPH_OSD_EXISTS)) {
 			pr_info("osd%d does not exist\n", osd);
-			map->osd_weight[osd] = CEPH_OSD_IN;
 			ret = set_primary_affinity(map, osd,
 						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
 			if (ret)
-- 
cgit v1.2.3


From 8767b293a4ab6632f9288f34bcf2ab9ba20dca3a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 2 Mar 2017 19:56:57 +0100
Subject: rbd: supported_features bus attribute

... so that userspace can generate meaningful error messages and spell
out unsupported features that need to be disabled.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 drivers/block/rbd.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4d6807723798..517838b65964 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v)
 
 /* Feature bits */
 
-#define RBD_FEATURE_LAYERING	(1<<0)
-#define RBD_FEATURE_STRIPINGV2	(1<<1)
-#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
-#define RBD_FEATURE_DATA_POOL (1<<7)
+#define RBD_FEATURE_LAYERING		(1ULL<<0)
+#define RBD_FEATURE_STRIPINGV2		(1ULL<<1)
+#define RBD_FEATURE_EXCLUSIVE_LOCK	(1ULL<<2)
+#define RBD_FEATURE_DATA_POOL		(1ULL<<7)
+
 #define RBD_FEATURES_ALL	(RBD_FEATURE_LAYERING |		\
 				 RBD_FEATURE_STRIPINGV2 |	\
 				 RBD_FEATURE_EXCLUSIVE_LOCK |	\
@@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
 	return is_lock_owner;
 }
 
+static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
+{
+	return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
+}
+
 static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
 static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
 static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
 static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
+static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
 
 static struct attribute *rbd_bus_attrs[] = {
 	&bus_attr_add.attr,
 	&bus_attr_remove.attr,
 	&bus_attr_add_single_major.attr,
 	&bus_attr_remove_single_major.attr,
+	&bus_attr_supported_features.attr,
 	NULL,
 };
 
-- 
cgit v1.2.3


From 7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sun, 12 Feb 2017 17:11:07 +0100
Subject: libceph: osd_request_timeout option

osd_request_timeout specifies how many seconds to wait for a response
from OSDs before returning -ETIMEDOUT from an OSD request.  0 (default)
means no limit.

osd_request_timeout is osdkeepalive-precise -- in-flight requests are
swept through every osdkeepalive seconds.  With ack vs commit behaviour
gone, abort_request() is really simple.

This is based on a patch from Artur Molchanov <artur.molchanov@synesis.ru>.

Tested-by: Artur Molchanov <artur.molchanov@synesis.ru>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/libceph.h    |  2 ++
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/ceph_common.c          | 15 +++++++++++++++
 net/ceph/osd_client.c           | 36 +++++++++++++++++++++++++++++++++++-
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e26581..88cd5dc8e238 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */
 
 #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
 #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 2ea0c282f3dc..c125b5d9e13c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -189,6 +189,7 @@ struct ceph_osd_request {
 
 	/* internal */
 	unsigned long r_stamp;                /* jiffies, send or check time */
+	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
 	struct ceph_eversion r_replay_version; /* aka reassert_version */
 	u32 r_last_force_resend;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e88599b9d..108533859a53 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
 	Opt_osdkeepalivetimeout,
 	Opt_mount_timeout,
 	Opt_osd_idle_ttl,
+	Opt_osd_request_timeout,
 	Opt_last_int,
 	/* int args above */
 	Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
 	{Opt_mount_timeout, "mount_timeout=%d"},
 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
 	/* int args above */
 	{Opt_fsid, "fsid=%s"},
 	{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
 
 	/* get mon ip(s) */
 	/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
 			}
 			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
 			break;
+		case Opt_osd_request_timeout:
+			/* 0 is "wait forever" (i.e. infinite timeout) */
+			if (intval < 0 || intval > INT_MAX / 1000) {
+				pr_err("osd_request_timeout out of range\n");
+				err = -EINVAL;
+				goto out;
+			}
+			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+			break;
 
 		case Opt_share:
 			opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 		seq_printf(m, "osdkeepalivetimeout=%d,",
 		    jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
+	if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
+		seq_printf(m, "osd_request_timeout=%d,",
+			   jiffies_to_msecs(opt->osd_request_timeout) / 1000);
 
 	/* drop redundant comma */
 	if (m->count != pos)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b65bbf9f45eb..e15ea9e4c495 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req)
 
 	req->r_flags |= CEPH_OSD_FLAG_ONDISK;
 	atomic_inc(&req->r_osdc->num_requests);
+
+	req->r_start_stamp = jiffies;
 }
 
 static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req)
 	ceph_osdc_put_request(req);
 }
 
+static void abort_request(struct ceph_osd_request *req, int err)
+{
+	dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
+
+	cancel_map_check(req);
+	complete_request(req, err);
+}
+
 static void check_pool_dne(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work)
 		container_of(work, struct ceph_osd_client, timeout_work.work);
 	struct ceph_options *opts = osdc->client->options;
 	unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
+	unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
 	LIST_HEAD(slow_osds);
 	struct rb_node *n, *p;
 
@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work)
 		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
 		bool found = false;
 
-		for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) {
+		for (p = rb_first(&osd->o_requests); p; ) {
 			struct ceph_osd_request *req =
 			    rb_entry(p, struct ceph_osd_request, r_node);
 
+			p = rb_next(p); /* abort_request() */
+
 			if (time_before(req->r_stamp, cutoff)) {
 				dout(" req %p tid %llu on osd%d is laggy\n",
 				     req, req->r_tid, osd->o_osd);
 				found = true;
 			}
+			if (opts->osd_request_timeout &&
+			    time_before(req->r_start_stamp, expiry_cutoff)) {
+				pr_err_ratelimited("tid %llu on osd%d timeout\n",
+				       req->r_tid, osd->o_osd);
+				abort_request(req, -ETIMEDOUT);
+			}
 		}
 		for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
 			struct ceph_osd_linger_request *lreq =
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work)
 			list_move_tail(&osd->o_keepalive_item, &slow_osds);
 	}
 
+	if (opts->osd_request_timeout) {
+		for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
+			struct ceph_osd_request *req =
+			    rb_entry(p, struct ceph_osd_request, r_node);
+
+			p = rb_next(p); /* abort_request() */
+
+			if (time_before(req->r_start_stamp, expiry_cutoff)) {
+				pr_err_ratelimited("tid %llu on osd%d timeout\n",
+				       req->r_tid, osdc->homeless_osd.o_osd);
+				abort_request(req, -ETIMEDOUT);
+			}
+		}
+	}
+
 	if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
 		maybe_request_map(osdc);
 
-- 
cgit v1.2.3


From 9b1b23f03abdd25ffde8bbfe5824b89bc0448c28 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 1 Mar 2017 22:00:41 +0100
Subject: clk: rockchip: add "," to mux_pll_src_apll_dpll_gpll_usb480m_p on
 rk3036

The mux_pll_src_apll_dpll_gpll_usb480m_p parent list was missing a ","
between the 3rd and 4th parent names, making them fall together and thus
lookups fail. Fix that.

Fixes: 5190c08b2989 ("clk: rockchip: add clock controller for rk3036")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/rockchip/clk-rk3036.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index 924f560dcf80..dcde70f4c105 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -127,7 +127,7 @@ PNAME(mux_ddrphy_p)		= { "dpll_ddr", "gpll_ddr" };
 PNAME(mux_pll_src_3plls_p)	= { "apll", "dpll", "gpll" };
 PNAME(mux_timer_p)		= { "xin24m", "pclk_peri_src" };
 
-PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)	= { "apll", "dpll", "gpll" "usb480m" };
+PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)	= { "apll", "dpll", "gpll", "usb480m" };
 
 PNAME(mux_mmc_src_p)	= { "apll", "dpll", "gpll", "xin24m" };
 PNAME(mux_i2s_pre_p)	= { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" };
-- 
cgit v1.2.3


From f8ba2d68e54fbca340ad0fce97397291ba9637bc Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 1 Mar 2017 22:00:42 +0100
Subject: clk: rockchip: Make uartpll a child of the gpll on rk3036

The shared uart-pll is on boot a child of the apll that can get changed
by cpu frequency scaling. So move it away to the more stable gpll to
make sure the uart doesn't break on cpu frequency changes.

This turned up during the 4.11 merge-window when commit
6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
added general termios enablement making the uart on rk3036 change
frequency and thus making it susceptible for the frequency scaling issue.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/rockchip/clk-rk3036.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index dcde70f4c105..00d4150e33c3 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -450,6 +450,13 @@ static void __init rk3036_clk_init(struct device_node *np)
 		return;
 	}
 
+	/*
+	 * Make uart_pll_clk a child of the gpll, as all other sources are
+	 * not that usable / stable.
+	 */
+	writel_relaxed(HIWORD_UPDATE(0x2, 0x3, 10),
+		       reg_base + RK2928_CLKSEL_CON(13));
+
 	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip clk init failed\n", __func__);
-- 
cgit v1.2.3


From 90922a2d03d84de36bf8a9979d62580102f31a92 Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <shankerd@codeaurora.org>
Date: Tue, 7 Mar 2017 08:20:38 -0600
Subject: irqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065

On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware
implementation uses 16Bytes for Interrupt Translation Entry (ITE),
but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size.

It might cause kernel memory corruption depending on the number
of MSI(x) that are configured and the amount of memory that has
been allocated for ITEs in its_create_device().

This patch fixes the potential memory corruption by setting the
correct ITE size to 16Bytes.

Cc: stable@vger.kernel.org
Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 10 ++++++++++
 drivers/irqchip/irq-gic-v3-its.c       | 16 ++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index a71b8095dbd8..2f66683500b8 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -68,3 +68,4 @@ stable kernels.
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414e..8c7c244247b6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
 
 	  If unsure, say Y.
 
+config QCOM_QDF2400_ERRATUM_0065
+	bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+	default y
+	help
+	  On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+	  ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+	  been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 23201004fd7a..f77f840d2b5f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
 	its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
 }
 
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+	struct its_node *its = data;
+
+	/* On QDF2400, the size of the ITE is 16Bytes */
+	its->ite_size = 16;
+}
+
 static const struct gic_quirk its_quirks[] = {
 #ifdef CONFIG_CAVIUM_ERRATUM_22375
 	{
@@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = {
 		.mask	= 0xffff0fff,
 		.init	= its_enable_quirk_cavium_23144,
 	},
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+	{
+		.desc	= "ITS: QDF2400 erratum 0065",
+		.iidr	= 0x00001070, /* QDF2400 ITS rev 1.x */
+		.mask	= 0xffffffff,
+		.init	= its_enable_quirk_qdf2400_e0065,
+	},
 #endif
 	{
 	}
-- 
cgit v1.2.3


From 4b9de5da7e120c7f02395da729f0ec77ce7a6044 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Mon, 6 Mar 2017 14:41:06 +0100
Subject: irqchip/crossbar: Fix incorrect type of register size

The 'size' variable is unsigned according to the dt-bindings.
As this variable is used as integer in other places, create a new variable
that allows to fix the following sparse issue (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:279:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:279:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:279:52:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-crossbar.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 05bbf171df37..1070b7b959f2 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 static int __init crossbar_of_init(struct device_node *node)
 {
 	int i, size, reserved = 0;
-	u32 max = 0, entry;
+	u32 max = 0, entry, reg_size;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
 	if (!cb->register_offsets)
 		goto err_irq_map;
 
-	of_property_read_u32(node, "ti,reg-size", &size);
+	of_property_read_u32(node, "ti,reg-size", &reg_size);
 
-	switch (size) {
+	switch (reg_size) {
 	case 1:
 		cb->write = crossbar_writeb;
 		break;
@@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
 			continue;
 
 		cb->register_offsets[i] = reserved;
-		reserved += size;
+		reserved += reg_size;
 	}
 
 	of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
-- 
cgit v1.2.3


From 2f707d97982286b307ef2a9b034e19aabc1abb56 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 6 Mar 2017 04:03:28 -0800
Subject: KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

    WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029
    nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029
    CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     __dump_stack lib/dump_stack.c:15 [inline]
     dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
     panic+0x1fb/0x412 kernel/panic.c:179
     __warn+0x1c4/0x1e0 kernel/panic.c:540
     warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
     nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029
     vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline]
     vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324
     kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099
     do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128
     __msr_io arch/x86/kvm/x86.c:2577 [inline]
     msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614
     kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497
     kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721
     vfs_ioctl fs/ioctl.c:43 [inline]
     do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683
     SYSC_ioctl fs/ioctl.c:698 [inline]
     SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689
     entry_SYSCALL_64_fastpath+0x1f/0xc2

The syzkaller folks reported a nested_run_pending warning during userspace
clear VMX capability which is exposed to L1 before.

The warning gets thrown while doing

(*(uint32_t*)0x20aecfe8 = (uint32_t)0x1);
(*(uint32_t*)0x20aecfec = (uint32_t)0x0);
(*(uint32_t*)0x20aecff0 = (uint32_t)0x3a);
(*(uint32_t*)0x20aecff4 = (uint32_t)0x0);
(*(uint64_t*)0x20aecff8 = (uint64_t)0x0);
r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul,
		0x20aecfe8ul, 0, 0, 0, 0, 0, 0);

i.e. KVM_SET_MSR ioctl with

struct kvm_msrs {
	.nmsrs = 1,
		.pad = 0,
		.entries = {
			{.index = MSR_IA32_FEATURE_CONTROL,
			 .reserved = 0,
			 .data = 0}
		}
}

The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to
reset here. This patch resets the nested_run_pending since the CPU is going
to be reset hence there should be nothing pending.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b626d6dc3ac..ab338581b3ec 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11107,8 +11107,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
  */
 static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu))
+	if (is_guest_mode(vcpu)) {
+		to_vmx(vcpu)->nested.nested_run_pending = 0;
 		nested_vmx_vmexit(vcpu, -1, 0, 0);
+	}
 	free_nested(to_vmx(vcpu));
 }
 
-- 
cgit v1.2.3


From 370a0ec1819990f8e2a93df7cc9c0146980ed45f Mon Sep 17 00:00:00 2001
From: Jintack Lim <jintack@cs.columbia.edu>
Date: Mon, 6 Mar 2017 05:42:37 -0800
Subject: KVM: arm/arm64: Let vcpu thread modify its own active state

Currently, if a vcpu thread tries to change the active state of an
interrupt which is already on the same vcpu's AP list, it will loop
forever. Since the VGIC mmio handler is called after a vcpu has
already synced back the LR state to the struct vgic_irq, we can just
let it proceed safely.

Cc: stable@vger.kernel.org
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jintack Lim <jintack@cs.columbia.edu>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-mmio.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 3654b4c835ef..2a5db1352722 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 				    bool new_active_state)
 {
+	struct kvm_vcpu *requester_vcpu;
 	spin_lock(&irq->irq_lock);
+
+	/*
+	 * The vcpu parameter here can mean multiple things depending on how
+	 * this function is called; when handling a trap from the kernel it
+	 * depends on the GIC version, and these functions are also called as
+	 * part of save/restore from userspace.
+	 *
+	 * Therefore, we have to figure out the requester in a reliable way.
+	 *
+	 * When accessing VGIC state from user space, the requester_vcpu is
+	 * NULL, which is fine, because we guarantee that no VCPUs are running
+	 * when accessing VGIC state from user space so irq->vcpu->cpu is
+	 * always -1.
+	 */
+	requester_vcpu = kvm_arm_get_running_vcpu();
+
 	/*
 	 * If this virtual IRQ was written into a list register, we
 	 * have to make sure the CPU that runs the VCPU thread has
-	 * synced back LR state to the struct vgic_irq.  We can only
-	 * know this for sure, when either this irq is not assigned to
-	 * anyone's AP list anymore, or the VCPU thread is not
-	 * running on any CPUs.
+	 * synced back the LR state to the struct vgic_irq.
 	 *
-	 * In the opposite case, we know the VCPU thread may be on its
-	 * way back from the guest and still has to sync back this
-	 * IRQ, so we release and re-acquire the spin_lock to let the
-	 * other thread sync back the IRQ.
+	 * As long as the conditions below are true, we know the VCPU thread
+	 * may be on its way back from the guest (we kicked the VCPU thread in
+	 * vgic_change_active_prepare)  and still has to sync back this IRQ,
+	 * so we release and re-acquire the spin_lock to let the other thread
+	 * sync back the IRQ.
 	 */
 	while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+	       irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
 	       irq->vcpu->cpu != -1) /* VCPU thread is running */
 		cond_resched_lock(&irq->irq_lock);
 
-- 
cgit v1.2.3


From f050fe7a9164945dd1c28be05bf00e8cfb082ccf Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 20 Feb 2017 12:30:11 +0000
Subject: arm: KVM: Survive unknown traps from guests

Currently we BUG() if we see a HSR.EC value we don't recognise. As
configurable disables/enables are added to the architecture (controlled
by RES1/RES0 bits respectively), with associated synchronous exceptions,
it may be possible for a guest to trigger exceptions with classes that
we don't recognise.

While we can't service these exceptions in a manner useful to the guest,
we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently
unallocated HSR EC encodings are reserved, and per ARM DDI
0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c
are reserved for future use with synchronous exceptions, and EC values
within the range 0x2d - 0x3f may be used for either synchronous or
asynchronous exceptions.

The patch makes KVM handle any unknown EC by injecting an UNDEFINED
exception into the guest, with a corresponding (ratelimited) warning in
the host dmesg. We could later improve on this with with a new (opt-in)
exit to the host userspace.

Cc: Dave Martin <dave.martin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_arm.h |  1 +
 arch/arm/kvm/handle_exit.c     | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089fb44dc..a3f0b3d50089 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -209,6 +209,7 @@
 #define HSR_EC_IABT_HYP	(0x21)
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
+#define HSR_EC_MAX	(0x3f)
 
 #define HSR_WFI_IS_WFE		(_AC(1, UL) << 0)
 
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 4e40d1955e35..96af65a30d78 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+		      hsr);
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+	[0 ... HSR_EC_MAX]	= kvm_handle_unknown_ec,
 	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
@@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 {
 	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
 
-	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unknown exception class: hsr: %#08x\n",
-			(unsigned int)kvm_vcpu_get_hsr(vcpu));
-		BUG();
-	}
-
 	return arm_exit_handlers[hsr_ec];
 }
 
-- 
cgit v1.2.3


From ba4dd156eabdca93501d92a980ba27fa5f4bbd27 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 20 Feb 2017 12:30:12 +0000
Subject: arm64: KVM: Survive unknown traps from guests

Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As
configurable disables/enables are added to the architecture (controlled
by RES1/RES0 bits respectively), with associated synchronous exceptions,
it may be possible for a guest to trigger exceptions with classes that
we don't recognise.

While we can't service these exceptions in a manner useful to the guest,
we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page
D7-1937, EC values within the range 0x00 - 0x2c are reserved for future
use with synchronous exceptions, and EC values within the range 0x2d -
0x3f may be used for either synchronous or asynchronous exceptions.

The patch makes KVM handle any unknown EC by injecting an UNDEFINED
exception into the guest, with a corresponding (ratelimited) warning in
the host dmesg. We could later improve on this with with a new (opt-in)
exit to the host userspace.

Cc: Dave Martin <dave.martin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/handle_exit.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 1bfe30dfbfe7..fa1b18e364fc 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return ret;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+		      hsr, esr_get_class_string(hsr));
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+	[0 ... ESR_ELx_EC_MAX]	= kvm_handle_unknown_ec,
 	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
 	[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_ELx_EC_CP15_64]	= kvm_handle_cp15_64,
@@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 	u8 hsr_ec = ESR_ELx_EC(hsr);
 
-	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
-			hsr, esr_get_class_string(hsr));
-		BUG();
-	}
-
 	return arm_exit_handlers[hsr_ec];
 }
 
-- 
cgit v1.2.3


From a5e1e6ca94a8cec51571fd62e3eaec269717969c Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Thu, 16 Feb 2017 10:41:20 +0000
Subject: KVM: arm/arm64: VGIC: Fix command handling while ITS being disabled

The ITS spec says that ITS commands are only processed when the ITS
is enabled (section 8.19.4, Enabled, bit[0]). Our emulation was not taking
this into account.
Fix this by checking the enabled state before handling CWRITER writes.

On the other hand that means that CWRITER could advance while the ITS
is disabled, and enabling it would need those commands to be processed.
Fix this case as well by refactoring actual command processing and
calling this from both the GITS_CWRITER and GITS_CTLR handlers.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 109 ++++++++++++++++++++++++++-----------------
 1 file changed, 65 insertions(+), 44 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 571b64a01c50..8d1da1af4b09 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
-					     struct vgic_its *its,
-					     gpa_t addr, unsigned int len)
-{
-	u32 reg = 0;
-
-	mutex_lock(&its->cmd_lock);
-	if (its->creadr == its->cwriter)
-		reg |= GITS_CTLR_QUIESCENT;
-	if (its->enabled)
-		reg |= GITS_CTLR_ENABLE;
-	mutex_unlock(&its->cmd_lock);
-
-	return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
-				     gpa_t addr, unsigned int len,
-				     unsigned long val)
-{
-	its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
 static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 					      struct vgic_its *its,
 					      gpa_t addr, unsigned int len)
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
 #define ITS_CMD_SIZE			32
 #define ITS_CMD_OFFSET(reg)		((reg) & GENMASK(19, 5))
 
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
-					gpa_t addr, unsigned int len,
-					unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
 {
 	gpa_t cbaser;
 	u64 cmd_buf[4];
-	u32 reg;
 
-	if (!its)
-		return;
-
-	mutex_lock(&its->cmd_lock);
-
-	reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
-	reg = ITS_CMD_OFFSET(reg);
-	if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
-		mutex_unlock(&its->cmd_lock);
+	/* Commands are only processed when the ITS is enabled. */
+	if (!its->enabled)
 		return;
-	}
 
-	its->cwriter = reg;
 	cbaser = CBASER_ADDRESS(its->cbaser);
 
 	while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
 		if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
 			its->creadr = 0;
 	}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+					gpa_t addr, unsigned int len,
+					unsigned long val)
+{
+	u64 reg;
+
+	if (!its)
+		return;
+
+	mutex_lock(&its->cmd_lock);
+
+	reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+	reg = ITS_CMD_OFFSET(reg);
+	if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+		mutex_unlock(&its->cmd_lock);
+		return;
+	}
+	its->cwriter = reg;
+
+	vgic_its_process_commands(kvm, its);
 
 	mutex_unlock(&its->cmd_lock);
 }
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 	*regptr = reg;
 }
 
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+					     struct vgic_its *its,
+					     gpa_t addr, unsigned int len)
+{
+	u32 reg = 0;
+
+	mutex_lock(&its->cmd_lock);
+	if (its->creadr == its->cwriter)
+		reg |= GITS_CTLR_QUIESCENT;
+	if (its->enabled)
+		reg |= GITS_CTLR_ENABLE;
+	mutex_unlock(&its->cmd_lock);
+
+	return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+				     gpa_t addr, unsigned int len,
+				     unsigned long val)
+{
+	mutex_lock(&its->cmd_lock);
+
+	its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+	/*
+	 * Try to process any pending commands. This function bails out early
+	 * if the ITS is disabled or no commands have been queued.
+	 */
+	vgic_its_process_commands(kvm, its);
+
+	mutex_unlock(&its->cmd_lock);
+}
+
 #define REGISTER_ITS_DESC(off, rd, wr, length, acc)		\
 {								\
 	.reg_offset = off,					\
-- 
cgit v1.2.3


From 8c71fff434e5ecf5ff27bd61db1bc9ac4c2b2a1b Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Fri, 3 Mar 2017 06:31:48 -0300
Subject: [media] v4l: vsp1: Adapt vsp1_du_setup_lif() interface to use a
 structure

The interface to configure the LIF in the VSP1 requires adapting the
function prototype for any changes. This makes extending the interface
difficult.

Change the function prototype to pass a structure which can be easily
extended.

This changes the means of disabling the pipeline, by now passing a NULL
configuration rather than passing either a 0 width or height.

[Fixed kerneldoc, made vsp1_du_setup_lif() cfg argument const]

Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/gpu/drm/rcar-du/rcar_du_vsp.c  |  8 ++++++--
 drivers/media/platform/vsp1/vsp1_drm.c | 33 ++++++++++++++++-----------------
 include/media/vsp1.h                   | 13 +++++++++++--
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index 83ebd162f3ef..22da2d671297 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -32,6 +32,10 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
 {
 	const struct drm_display_mode *mode = &crtc->crtc.state->adjusted_mode;
 	struct rcar_du_device *rcdu = crtc->group->dev;
+	struct vsp1_du_lif_config cfg = {
+		.width = mode->hdisplay,
+		.height = mode->vdisplay,
+	};
 	struct rcar_du_plane_state state = {
 		.state = {
 			.crtc = &crtc->crtc,
@@ -66,12 +70,12 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
 	 */
 	crtc->group->need_restart = true;
 
-	vsp1_du_setup_lif(crtc->vsp->vsp, mode->hdisplay, mode->vdisplay);
+	vsp1_du_setup_lif(crtc->vsp->vsp, &cfg);
 }
 
 void rcar_du_vsp_disable(struct rcar_du_crtc *crtc)
 {
-	vsp1_du_setup_lif(crtc->vsp->vsp, 0, 0);
+	vsp1_du_setup_lif(crtc->vsp->vsp, NULL);
 }
 
 void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc)
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index b4b583f7137a..b4c0f10fc3b0 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -54,12 +54,11 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
 /**
  * vsp1_du_setup_lif - Setup the output part of the VSP pipeline
  * @dev: the VSP device
- * @width: output frame width in pixels
- * @height: output frame height in pixels
+ * @cfg: the LIF configuration
  *
- * Configure the output part of VSP DRM pipeline for the given frame @width and
- * @height. This sets up formats on the BRU source pad, the WPF0 sink and source
- * pads, and the LIF sink pad.
+ * Configure the output part of VSP DRM pipeline for the given frame @cfg.width
+ * and @cfg.height. This sets up formats on the BRU source pad, the WPF0 sink
+ * and source pads, and the LIF sink pad.
  *
  * As the media bus code on the BRU source pad is conditioned by the
  * configuration of the BRU sink 0 pad, we also set up the formats on all BRU
@@ -69,8 +68,7 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
  *
  * Return 0 on success or a negative error code on failure.
  */
-int vsp1_du_setup_lif(struct device *dev, unsigned int width,
-		      unsigned int height)
+int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	struct vsp1_pipeline *pipe = &vsp1->drm->pipe;
@@ -79,11 +77,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	unsigned int i;
 	int ret;
 
-	dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n",
-		__func__, width, height);
-
-	if (width == 0 || height == 0) {
-		/* Zero width or height means the CRTC is being disabled, stop
+	if (!cfg) {
+		/* NULL configuration means the CRTC is being disabled, stop
 		 * the pipeline and turn the light off.
 		 */
 		ret = vsp1_pipeline_stop(pipe);
@@ -108,6 +103,9 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 		return 0;
 	}
 
+	dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n",
+		__func__, cfg->width, cfg->height);
+
 	/* Configure the format at the BRU sinks and propagate it through the
 	 * pipeline.
 	 */
@@ -117,8 +115,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	for (i = 0; i < bru->entity.source_pad; ++i) {
 		format.pad = i;
 
-		format.format.width = width;
-		format.format.height = height;
+		format.format.width = cfg->width;
+		format.format.height = cfg->height;
 		format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32;
 		format.format.field = V4L2_FIELD_NONE;
 
@@ -133,8 +131,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	}
 
 	format.pad = bru->entity.source_pad;
-	format.format.width = width;
-	format.format.height = height;
+	format.format.width = cfg->width;
+	format.format.height = cfg->height;
 	format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32;
 	format.format.field = V4L2_FIELD_NONE;
 
@@ -180,7 +178,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	/* Verify that the format at the output of the pipeline matches the
 	 * requested frame size and media bus code.
 	 */
-	if (format.format.width != width || format.format.height != height ||
+	if (format.format.width != cfg->width ||
+	    format.format.height != cfg->height ||
 	    format.format.code != MEDIA_BUS_FMT_ARGB8888_1X32) {
 		dev_dbg(vsp1->dev, "%s: format mismatch\n", __func__);
 		return -EPIPE;
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index 458b400373d4..38aac554dbba 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -20,8 +20,17 @@ struct device;
 
 int vsp1_du_init(struct device *dev);
 
-int vsp1_du_setup_lif(struct device *dev, unsigned int width,
-		      unsigned int height);
+/**
+ * struct vsp1_du_lif_config - VSP LIF configuration
+ * @width: output frame width
+ * @height: output frame height
+ */
+struct vsp1_du_lif_config {
+	unsigned int width;
+	unsigned int height;
+};
+
+int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg);
 
 struct vsp1_du_atomic_config {
 	u32 pixelformat;
-- 
cgit v1.2.3


From 45838660e34d90db8d4f7cbc8fd66e8aff79f4fe Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 7 Mar 2017 09:31:29 -0800
Subject: Input: i8042 - add noloop quirk for Dell Embedded Box PC 3000

The aux port does not get detected without noloop quirk, so external PS/2
mouse cannot work as result.

The PS/2 mouse can work with this quirk.

BugLink: https://bugs.launchpad.net/bugs/1591053
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Reviewed-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 05afd16ea9c9..e856b073d533 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -119,6 +119,13 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "DL760"),
 		},
 	},
+	{
+		/* Dell Embedded Box PC 3000 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"),
+		},
+	},
 	{
 		/* OQO Model 01 */
 		.matches = {
-- 
cgit v1.2.3


From 82f2341c94d270421f383641b7cd670e474db56b Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Tue, 28 Feb 2017 19:54:40 +0300
Subject: tty: n_hdlc: get rid of racy n_hdlc.tbuf

Currently N_HDLC line discipline uses a self-made singly linked list for
data buffers and has n_hdlc.tbuf pointer for buffer retransmitting after
an error.

The commit be10eb7589337e5defbe214dae038a53dd21add8
("tty: n_hdlc add buffer flushing") introduced racy access to n_hdlc.tbuf.
After tx error concurrent flush_tx_queue() and n_hdlc_send_frames() can put
one data buffer to tx_free_buf_list twice. That causes double free in
n_hdlc_release().

Let's use standard kernel linked list and get rid of n_hdlc.tbuf:
in case of tx error put current data buffer after the head of tx_buf_list.

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_hdlc.c | 132 +++++++++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 63 deletions(-)

diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 1bacbc3b19a0..e94aea8c0d05 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -114,7 +114,7 @@
 #define DEFAULT_TX_BUF_COUNT 3
 
 struct n_hdlc_buf {
-	struct n_hdlc_buf *link;
+	struct list_head  list_item;
 	int		  count;
 	char		  buf[1];
 };
@@ -122,8 +122,7 @@ struct n_hdlc_buf {
 #define	N_HDLC_BUF_SIZE	(sizeof(struct n_hdlc_buf) + maxframe)
 
 struct n_hdlc_buf_list {
-	struct n_hdlc_buf *head;
-	struct n_hdlc_buf *tail;
+	struct list_head  list;
 	int		  count;
 	spinlock_t	  spinlock;
 };
@@ -136,7 +135,6 @@ struct n_hdlc_buf_list {
  * @backup_tty - TTY to use if tty gets closed
  * @tbusy - reentrancy flag for tx wakeup code
  * @woke_up - FIXME: describe this field
- * @tbuf - currently transmitting tx buffer
  * @tx_buf_list - list of pending transmit frame buffers
  * @rx_buf_list - list of received frame buffers
  * @tx_free_buf_list - list unused transmit frame buffers
@@ -149,7 +147,6 @@ struct n_hdlc {
 	struct tty_struct	*backup_tty;
 	int			tbusy;
 	int			woke_up;
-	struct n_hdlc_buf	*tbuf;
 	struct n_hdlc_buf_list	tx_buf_list;
 	struct n_hdlc_buf_list	rx_buf_list;
 	struct n_hdlc_buf_list	tx_free_buf_list;
@@ -159,6 +156,8 @@ struct n_hdlc {
 /*
  * HDLC buffer list manipulation functions
  */
+static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list,
+						struct n_hdlc_buf *buf);
 static void n_hdlc_buf_put(struct n_hdlc_buf_list *list,
 			   struct n_hdlc_buf *buf);
 static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
@@ -208,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty)
 {
 	struct n_hdlc *n_hdlc = tty2n_hdlc(tty);
 	struct n_hdlc_buf *buf;
-	unsigned long flags;
 
 	while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list)))
 		n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf);
- 	spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags);
-	if (n_hdlc->tbuf) {
-		n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf);
-		n_hdlc->tbuf = NULL;
-	}
-	spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
 }
 
 static struct tty_ldisc_ops n_hdlc_ldisc = {
@@ -283,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc)
 		} else
 			break;
 	}
-	kfree(n_hdlc->tbuf);
 	kfree(n_hdlc);
 	
 }	/* end of n_hdlc_release() */
@@ -402,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 	n_hdlc->woke_up = 0;
 	spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
 
-	/* get current transmit buffer or get new transmit */
-	/* buffer from list of pending transmit buffers */
-		
-	tbuf = n_hdlc->tbuf;
-	if (!tbuf)
-		tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
-		
+	tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
 	while (tbuf) {
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d)sending frame %p, count=%d\n",
@@ -420,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 
 		/* rollback was possible and has been done */
 		if (actual == -ERESTARTSYS) {
-			n_hdlc->tbuf = tbuf;
+			n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf);
 			break;
 		}
 		/* if transmit error, throw frame away by */
@@ -435,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 					
 			/* free current transmit buffer */
 			n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf);
-			
-			/* this tx buffer is done */
-			n_hdlc->tbuf = NULL;
-			
+
 			/* wait up sleeping writers */
 			wake_up_interruptible(&tty->write_wait);
 	
@@ -448,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 			if (debuglevel >= DEBUG_LEVEL_INFO)	
 				printk("%s(%d)frame %p pending\n",
 					__FILE__,__LINE__,tbuf);
-					
-			/* buffer not accepted by driver */
-			/* set this buffer as pending buffer */
-			n_hdlc->tbuf = tbuf;
+
+			/*
+			 * the buffer was not accepted by driver,
+			 * return it back into tx queue
+			 */
+			n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf);
 			break;
 		}
 	}
@@ -749,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 	int error = 0;
 	int count;
 	unsigned long flags;
-	
+	struct n_hdlc_buf *buf = NULL;
+
 	if (debuglevel >= DEBUG_LEVEL_INFO)	
 		printk("%s(%d)n_hdlc_tty_ioctl() called %d\n",
 			__FILE__,__LINE__,cmd);
@@ -763,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 		/* report count of read data available */
 		/* in next available frame (if any) */
 		spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
-		if (n_hdlc->rx_buf_list.head)
-			count = n_hdlc->rx_buf_list.head->count;
+		buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list,
+						struct n_hdlc_buf, list_item);
+		if (buf)
+			count = buf->count;
 		else
 			count = 0;
 		spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
@@ -776,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 		count = tty_chars_in_buffer(tty);
 		/* add size of next output frame in queue */
 		spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags);
-		if (n_hdlc->tx_buf_list.head)
-			count += n_hdlc->tx_buf_list.head->count;
+		buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list,
+						struct n_hdlc_buf, list_item);
+		if (buf)
+			count += buf->count;
 		spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags);
 		error = put_user(count, (int __user *)arg);
 		break;
@@ -825,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
 		poll_wait(filp, &tty->write_wait, wait);
 
 		/* set bits for operations that won't block */
-		if (n_hdlc->rx_buf_list.head)
+		if (!list_empty(&n_hdlc->rx_buf_list.list))
 			mask |= POLLIN | POLLRDNORM;	/* readable */
 		if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
 			mask |= POLLHUP;
 		if (tty_hung_up_p(filp))
 			mask |= POLLHUP;
 		if (!tty_is_writelocked(tty) &&
-				n_hdlc->tx_free_buf_list.head)
+				!list_empty(&n_hdlc->tx_free_buf_list.list))
 			mask |= POLLOUT | POLLWRNORM;	/* writable */
 	}
 	return mask;
@@ -856,7 +845,12 @@ static struct n_hdlc *n_hdlc_alloc(void)
 	spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock);
 	spin_lock_init(&n_hdlc->rx_buf_list.spinlock);
 	spin_lock_init(&n_hdlc->tx_buf_list.spinlock);
-	
+
+	INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list);
+	INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list);
+	INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list);
+	INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list);
+
 	/* allocate free rx buffer list */
 	for(i=0;i<DEFAULT_RX_BUF_COUNT;i++) {
 		buf = kmalloc(N_HDLC_BUF_SIZE, GFP_KERNEL);
@@ -883,54 +877,66 @@ static struct n_hdlc *n_hdlc_alloc(void)
 	
 }	/* end of n_hdlc_alloc() */
 
+/**
+ * n_hdlc_buf_return - put the HDLC buffer after the head of the specified list
+ * @buf_list - pointer to the buffer list
+ * @buf - pointer to the buffer
+ */
+static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list,
+						struct n_hdlc_buf *buf)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&buf_list->spinlock, flags);
+
+	list_add(&buf->list_item, &buf_list->list);
+	buf_list->count++;
+
+	spin_unlock_irqrestore(&buf_list->spinlock, flags);
+}
+
 /**
  * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list
- * @list - pointer to buffer list
+ * @buf_list - pointer to buffer list
  * @buf	- pointer to buffer
  */
-static void n_hdlc_buf_put(struct n_hdlc_buf_list *list,
+static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list,
 			   struct n_hdlc_buf *buf)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&list->spinlock,flags);
-	
-	buf->link=NULL;
-	if (list->tail)
-		list->tail->link = buf;
-	else
-		list->head = buf;
-	list->tail = buf;
-	(list->count)++;
-	
-	spin_unlock_irqrestore(&list->spinlock,flags);
-	
+
+	spin_lock_irqsave(&buf_list->spinlock, flags);
+
+	list_add_tail(&buf->list_item, &buf_list->list);
+	buf_list->count++;
+
+	spin_unlock_irqrestore(&buf_list->spinlock, flags);
 }	/* end of n_hdlc_buf_put() */
 
 /**
  * n_hdlc_buf_get - remove and return an HDLC buffer from list
- * @list - pointer to HDLC buffer list
+ * @buf_list - pointer to HDLC buffer list
  * 
  * Remove and return an HDLC buffer from the head of the specified HDLC buffer
  * list.
  * Returns a pointer to HDLC buffer if available, otherwise %NULL.
  */
-static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list)
+static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list)
 {
 	unsigned long flags;
 	struct n_hdlc_buf *buf;
-	spin_lock_irqsave(&list->spinlock,flags);
-	
-	buf = list->head;
+
+	spin_lock_irqsave(&buf_list->spinlock, flags);
+
+	buf = list_first_entry_or_null(&buf_list->list,
+						struct n_hdlc_buf, list_item);
 	if (buf) {
-		list->head = buf->link;
-		(list->count)--;
+		list_del(&buf->list_item);
+		buf_list->count--;
 	}
-	if (!list->head)
-		list->tail = NULL;
-	
-	spin_unlock_irqrestore(&list->spinlock,flags);
+
+	spin_unlock_irqrestore(&buf_list->spinlock, flags);
 	return buf;
-	
 }	/* end of n_hdlc_buf_get() */
 
 static char hdlc_banner[] __initdata =
-- 
cgit v1.2.3


From 2eacc79c27eb683c4a3ded80c2629387ee0d4e04 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sat, 18 Feb 2017 07:31:00 -0500
Subject: radix tree test suite: Add test for idr_get_next()

Assert that idr_get_next() returns the next populated entry in the tree with
an ID greater than or equal to the value pointed to by @nextid argument.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index a26098c6123d..f20690ac3a97 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -153,6 +153,30 @@ void idr_nowait_test(void)
 	idr_destroy(&idr);
 }
 
+void idr_get_next_test(void)
+{
+	unsigned long i;
+	int nextid;
+	DEFINE_IDR(idr);
+
+	int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+	for(i = 0; indices[i]; i++) {
+		struct item *item = item_create(indices[i], 0);
+		assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+				 GFP_KERNEL) == indices[i]);
+	}
+
+	for(i = 0, nextid = 0; indices[i]; i++) {
+		idr_get_next(&idr, &nextid);
+		assert(nextid == indices[i]);
+		nextid++;
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -202,6 +226,7 @@ void idr_checks(void)
 	idr_alloc_test();
 	idr_null_test();
 	idr_nowait_test();
+	idr_get_next_test();
 }
 
 /*
-- 
cgit v1.2.3


From 166bb1f532fd9fe1b81c6b411ad5d5c9dd21a685 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 20 Feb 2017 06:40:00 -0500
Subject: radix tree test suite: Add tests for ida_simple_get() and
 ida_simple_remove()

Assert that ida_simple_get() allocates an id in the passed range or returns
error on failure, and ida_simple_remove() releases an allocated id.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index f20690ac3a97..86de901fa5c6 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -387,6 +387,24 @@ void ida_check_random(void)
 		goto repeat;
 }
 
+void ida_simple_get_remove_test(void)
+{
+	DEFINE_IDA(ida);
+	unsigned long i;
+
+	for (i = 0; i < 10000; i++) {
+		assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+	}
+	assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 10000; i++) {
+		ida_simple_remove(&ida, i);
+	}
+	assert(ida_is_empty(&ida));
+
+	ida_destroy(&ida);
+}
+
 void ida_checks(void)
 {
 	DEFINE_IDA(ida);
@@ -453,6 +471,7 @@ void ida_checks(void)
 	ida_check_max();
 	ida_check_conv();
 	ida_check_random();
+	ida_simple_get_remove_test();
 
 	radix_tree_cpu_dead(1);
 }
-- 
cgit v1.2.3


From c629a344accd15022dd6d87fa28c8e22f978fbda Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sun, 26 Feb 2017 06:33:00 -0500
Subject: radix tree test suite: Add test for radix_tree_clear_tags()

Assert that radix_tree_clear_tags() clears the tags on the passed node and
slot. Assert that the case where the radix tree has only one entry at index
zero and the node is NULL, is also handled.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/tag_check.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index d4ff00989245..36dcf7d6945d 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -330,6 +330,34 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
+void radix_tree_clear_tags_test(void)
+{
+	unsigned long index;
+	struct radix_tree_node *node;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 0);
+	item_tag_set(&tree, 0, 0);
+	__radix_tree_lookup(&tree, 0, &node, &slot);
+	radix_tree_clear_tags(&tree, node, slot);
+	assert(item_tag_get(&tree, 0, 0) == 0);
+
+	for (index = 0; index < 1000; index++) {
+		item_insert(&tree, index);
+		item_tag_set(&tree, index, 0);
+	}
+
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_clear_tags(&tree, iter.node, slot);
+		assert(item_tag_get(&tree, iter.index, 0) == 0);
+	}
+
+	item_kill_tree(&tree);
+}
+
 void tag_check(void)
 {
 	single_check();
@@ -347,4 +375,5 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+	radix_tree_clear_tags_test();
 }
-- 
cgit v1.2.3


From 0d4a41c1a0335dc515c6e7fabd447263b57f6457 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sun, 26 Feb 2017 16:17:00 -0500
Subject: radix tree test suite: Add performance benchmarks

Add performance benchmarks for radix tree insertion, tagging and deletion.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 82 +++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 7 deletions(-)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 9b09ddfe462f..35741b9c2a4a 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -17,6 +17,9 @@
 #include <time.h>
 #include "test.h"
 
+#define for_each_index(i, base, order) \
+	        for (i = base; i < base + (1 << order); i++)
+
 #define NSEC_PER_SEC	1000000000L
 
 static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
@@ -57,22 +60,87 @@ again:
 	return nsec;
 }
 
+static void benchmark_insert(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		item_insert_order(root, index, order);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		radix_tree_tag_set(root, index, 0);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index, i;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		for_each_index(i, index, order)
+			item_delete(root, i);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+		size, step, order, nsec);
+}
+
 static void benchmark_size(unsigned long size, unsigned long step, int order)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 	long long normal, tagged;
-	unsigned long index;
 
-	for (index = 0 ; index < size ; index += step) {
-		item_insert_order(&tree, index, order);
-		radix_tree_tag_set(&tree, index, 0);
-	}
+	benchmark_insert(&tree, size, step, order);
+	benchmark_tagging(&tree, size, step, order);
 
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
-		size, step, order, tagged, normal);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+		size, step, order, tagged);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+		size, step, order, normal);
+
+	benchmark_delete(&tree, size, step, order);
 
 	item_kill_tree(&tree);
 	rcu_barrier();
-- 
cgit v1.2.3


From 6478581c85cd3091a6188a2433a8093f335f8f2a Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 07:53:00 -0500
Subject: radix tree test suite: Add performance test for radix_tree_split()

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 44 ++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 35741b9c2a4a..b03c3f30c740 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -146,6 +146,46 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	rcu_barrier();
 }
 
+static long long  __benchmark_split(unsigned long index,
+				    int old_order, int new_order)
+{
+	struct timespec start, finish;
+	long long nsec;
+	RADIX_TREE(tree, GFP_ATOMIC);
+
+	item_insert_order(&tree, index, old_order);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_split(&tree, index, new_order);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+
+	for (idx = 0; idx < size; idx += step) {
+		for (i = 3; i < 11; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_split(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+			size, step, nsec);
+
+}
+
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -163,4 +203,8 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_size(size[c], step[s] << 9, 9);
+
+	for (c = 0; size[c]; c++)
+		for (s = 0; step[s]; s++)
+			benchmark_split(size[c], step[s]);
 }
-- 
cgit v1.2.3


From 54f4d3341c8fe31e20915e2c1fb322ff8a069832 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 08:11:00 -0500
Subject: radix tree test suite: Add performance test for radix_tree_join()

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 47 ++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index b03c3f30c740..99c40f3ed133 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -186,6 +186,50 @@ static void benchmark_split(unsigned long size, unsigned long step)
 
 }
 
+static long long  __benchmark_join(unsigned long index,
+			     unsigned order1, unsigned order2)
+{
+	unsigned long loc;
+	struct timespec start, finish;
+	long long nsec;
+	void *item, *item2 = item_create(index + 1, order1);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert_order(&tree, index, order2);
+	item = radix_tree_lookup(&tree, index);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_join(&tree, index + 1, order1, item2);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+		(finish.tv_nsec - start.tv_nsec);
+
+	loc = find_item(&tree, item);
+	if (loc == -1)
+		free(item);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+	for (idx = 0; idx < 1 << 10; idx += step) {
+		for (i = 1; i < 15; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_join(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+			1 << 10, step, nsec);
+}
+
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -207,4 +251,7 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_split(size[c], step[s]);
+
+	for (s = 0; step[s]; s++)
+		benchmark_join(step[s]);
 }
-- 
cgit v1.2.3


From c4634b08d9eb9c13be2296230bf33c79390dbf9c Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 08:49:00 -0500
Subject: radix tree test suite: Build 32 bit binaries

Add option 'make BUILD=32' for building 32-bit binaries.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index f11315bedefc..b59c2a9ef778 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -10,6 +10,10 @@ ifndef SHIFT
 	SHIFT=3
 endif
 
+ifeq ($(BUILD), 32)
+	CFLAGS += -m32
+endif
+
 targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
-- 
cgit v1.2.3


From 284d96a494d705b9c5330c900e976fceda885b9f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 2 Mar 2017 04:29:00 -0500
Subject: radix tree test suite: Fix build with --as-needed

Currently the radix tree test suite doesn't build with toolchains that
use --as-needed by default, for example Ubuntu's:

  cc -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -lpthread -lurcu main.o ... -o main
  /usr/bin/ld: regression1.o: undefined reference to symbol 'pthread_join@@GLIBC_2.17'
  /lib/powerpc64le-linux-gnu/libpthread.so.0: error adding symbols: DSO missing from command line
  collect2: error: ld returned 1 exit status

This is caused by the custom makefile rules placing LDFLAGS before the
.o files that need the libraries.

We could fix it by using --no-as-needed, or rewriting the custom rules.
But we can also just drop the custom rules and move the libraries to
LDLIBS, and then the default rules work correctly - with the one caveat
that we need to add -fsanitize=address to LDFLAGS because that must be
passed to the linker as well as the compiler.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index b59c2a9ef778..022488f50fc6 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,6 +1,7 @@
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -lpthread -lurcu
+LDFLAGS += -fsanitize=address
+LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
@@ -17,13 +18,10 @@ endif
 targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
 
 idr-test: idr-test.o $(CORE_OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
 
 multiorder: multiorder.o $(CORE_OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
 	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
-- 
cgit v1.2.3


From 3f1b6f9d49ba5a209d745fa2448657d8b66ed0c0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 2 Mar 2017 12:24:28 -0500
Subject: radix tree test suite: Depend on Makefile and quieten grep

Changing the CFLAGS in the Makefile didn't always lead to a
recompilation because the OFILES didn't depend on the Makefile.
Also, after doing make clean, grep would still complain about
a missing map-shift.h; we need -s as well as -q.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 022488f50fc6..4c6289c5d415 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -28,7 +28,7 @@ clean:
 
 vpath %.c ../../lib
 
-$(OFILES): *.h */*.h generated/map-shift.h \
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h \
@@ -43,7 +43,7 @@ idr.c: ../../../lib/idr.c
 .PHONY: mapshift
 
 mapshift:
-	@if ! grep -qw $(SHIFT) generated/map-shift.h; then		\
+	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
 		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi
-- 
cgit v1.2.3


From 4ecd9542dbc3e07f3bd3870aac12839f72b47db4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Mar 2017 12:16:10 -0500
Subject: ida: Free correct IDA bitmap

There's a relatively rare race where we look at the per-cpu preallocated
IDA bitmap, see it's NULL, allocate a new one, and atomically update it.
If the kmalloc() happened to sleep and we were rescheduled to a different
CPU, or an interrupt came in at the exact right time, another task
might have successfully allocated a bitmap and already deposited it.
I forgot what the semantics of cmpxchg() were and ended up freeing the
wrong bitmap leading to KASAN reporting a use-after-free.

Dmitry found the bug with syzkaller & wrote the patch.  I wrote the test
case that will reproduce the bug without his patch being applied.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/radix-tree.c                    |  4 ++--
 tools/testing/radix-tree/idr-test.c | 34 +++++++++++++++++++++++++++++++---
 tools/testing/radix-tree/main.c     |  1 +
 tools/testing/radix-tree/test.h     |  1 +
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 5ed506d648c4..691a9ad48497 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2129,8 +2129,8 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
 		if (!bitmap)
 			return 0;
-		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
-		kfree(bitmap);
+		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
+			kfree(bitmap);
 	}
 
 	return 1;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 86de901fa5c6..30cd0b296f1a 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -363,7 +363,7 @@ void ida_check_random(void)
 {
 	DEFINE_IDA(ida);
 	DECLARE_BITMAP(bitmap, 2048);
-	int id;
+	int id, err;
 	unsigned int i;
 	time_t s = time(NULL);
 
@@ -377,8 +377,11 @@ void ida_check_random(void)
 			ida_remove(&ida, bit);
 		} else {
 			__set_bit(bit, bitmap);
-			ida_pre_get(&ida, GFP_KERNEL);
-			assert(!ida_get_new_above(&ida, bit, &id));
+			do {
+				ida_pre_get(&ida, GFP_KERNEL);
+				err = ida_get_new_above(&ida, bit, &id);
+			} while (err == -ENOMEM);
+			assert(!err);
 			assert(id == bit);
 		}
 	}
@@ -476,11 +479,36 @@ void ida_checks(void)
 	radix_tree_cpu_dead(1);
 }
 
+static void *ida_random_fn(void *arg)
+{
+	rcu_register_thread();
+	ida_check_random();
+	rcu_unregister_thread();
+	return NULL;
+}
+
+void ida_thread_tests(void)
+{
+	pthread_t threads[10];
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++)
+		if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+			perror("creating ida thread");
+			exit(1);
+		}
+
+	while (i--)
+		pthread_join(threads[i], NULL);
+}
+
 int __weak main(void)
 {
 	radix_tree_init();
 	idr_checks();
 	ida_checks();
+	ida_thread_tests();
+	radix_tree_cpu_dead(1);
 	rcu_barrier();
 	if (nr_allocated)
 		printf("nr_allocated = %d\n", nr_allocated);
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b829127d5670..bc9a78449572 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -368,6 +368,7 @@ int main(int argc, char **argv)
 	iteration_test(0, 10 + 90 * long_run);
 	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
+	ida_thread_tests();
 
 	/* Free any remaining preallocated nodes */
 	radix_tree_cpu_dead(0);
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index b30e11d9d271..0f8220cc6166 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
 void idr_checks(void);
 void ida_checks(void);
+void ida_thread_tests(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
-- 
cgit v1.2.3


From f0f3f2d0a3e0f7c48163fd8b45f5909d46e7f371 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Mar 2017 12:28:37 -0500
Subject: radix tree test suite: Specify -m32 in LDFLAGS too

Michael's patch to use the default make rule for linking and the patch
from Rehas to use -m32 if building a 32-bit test-suite on a 64-bit
platform don't work well together.

Reported-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 4c6289c5d415..6a9480c03cbd 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -13,6 +13,7 @@ endif
 
 ifeq ($(BUILD), 32)
 	CFLAGS += -m32
+	LDFLAGS += -m32
 endif
 
 targets: mapshift $(TARGETS)
-- 
cgit v1.2.3


From 544714d8e17c33822319d5a1a00e5ddc4db502b6 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Tue, 7 Mar 2017 19:54:05 +0900
Subject: PCI: exynos: Initialize elbi_base even when using PHY framework

Even when using the PHY framework, we need the elbi_base.  Before this
patch, we didn't initialize elbi_base, which caused NULL pointer
dereferences later.

Fixes: e7cd7ef58e1f ("PCI: exynos: Support the PHY generic framework")
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/dwc/pci-exynos.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c
index 993b650ef275..44f774c12fb2 100644
--- a/drivers/pci/dwc/pci-exynos.c
+++ b/drivers/pci/dwc/pci-exynos.c
@@ -132,10 +132,6 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev,
 	struct device *dev = pci->dev;
 	struct resource *res;
 
-	/* If using the PHY framework, doesn't need to get other resource */
-	if (ep->using_phy)
-		return 0;
-
 	ep->mem_res = devm_kzalloc(dev, sizeof(*ep->mem_res), GFP_KERNEL);
 	if (!ep->mem_res)
 		return -ENOMEM;
@@ -145,6 +141,10 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev,
 	if (IS_ERR(ep->mem_res->elbi_base))
 		return PTR_ERR(ep->mem_res->elbi_base);
 
+	/* If using the PHY framework, doesn't need to get other resource */
+	if (ep->using_phy)
+		return 0;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	ep->mem_res->phy_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ep->mem_res->phy_base))
-- 
cgit v1.2.3


From f98c7bce570bdbe344b74ff5daa7dfeef3f22929 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 25 Feb 2017 18:36:44 +0200
Subject: serial: samsung: Continue to work if DMA request fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If DMA is not available (even when configured in DeviceTree), the driver
will fail the startup procedure thus making serial console not
available.

For example this causes boot failure on QEMU ARMv7 (Exynos4210, SMDKC210):
    [    1.302575] OF: amba_device_add() failed (-19) for /amba/pdma@12680000
    ...
    [   11.435732] samsung-uart 13800000.serial: DMA request failed
    [   72.963893] samsung-uart 13800000.serial: DMA request failed
    [   73.143361] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000000

DMA is not necessary for serial to work, so continue with UART startup
after emitting a warning.

Fixes: 62c37eedb74c ("serial: samsung: add dma reqest/release functions")
Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/samsung.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index b4f86c219db1..7a17aedbf902 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1031,8 +1031,10 @@ static int s3c64xx_serial_startup(struct uart_port *port)
 	if (ourport->dma) {
 		ret = s3c24xx_serial_request_dma(ourport);
 		if (ret < 0) {
-			dev_warn(port->dev, "DMA request failed\n");
-			return ret;
+			dev_warn(port->dev,
+				 "DMA request failed, DMA will not be used\n");
+			devm_kfree(port->dev, ourport->dma);
+			ourport->dma = NULL;
 		}
 	}
 
-- 
cgit v1.2.3


From a3a4a816b4b194c45d0217e8b9e08b2639802cda Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 27 Feb 2017 21:54:50 +0100
Subject: dt: emac: document device-tree based phy discovery and setup

This patch adds documentation for a new "phy-handle" property,
"fixed-link" and "mdio" sub-node. These allows the enumeration
of PHYs which are supported by the phy library under drivers/net/phy.

The EMAC ethernet controller in IBM and AMCC 4xx chips is
currently stuck with a few privately defined phy
implementations. It has no support for PHYs which
are supported by the generic phylib.

Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/powerpc/4xx/emac.txt       | 62 +++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
index 712baf6c3e24..44b842b6ca15 100644
--- a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
+++ b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
@@ -71,6 +71,9 @@
 			  For Axon it can be absent, though my current driver
 			  doesn't handle phy-address yet so for now, keep
 			  0x00ffffff in it.
+    - phy-handle	: Used to describe configurations where a external PHY
+			  is used. Please refer to:
+			  Documentation/devicetree/bindings/net/ethernet.txt
     - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
 			  operations (if absent the value is the same as
 			  rx-fifo-size).  For Axon, either absent or 2048.
@@ -81,8 +84,22 @@
 			  offload, phandle of the TAH device node.
     - tah-channel       : 1 cell, optional. If appropriate, channel used on the
 			  TAH engine.
+    - fixed-link	: Fixed-link subnode describing a link to a non-MDIO
+			  managed entity. See
+			  Documentation/devicetree/bindings/net/fixed-link.txt
+			  for details.
+    - mdio subnode	: When the EMAC has a phy connected to its local
+			  mdio, which us supported by the kernel's network
+			  PHY library in drivers/net/phy, there must be device
+			  tree subnode with the following required properties:
+				- #address-cells: Must be <1>.
+				- #size-cells: Must be <0>.
 
-    Example:
+			  For PHY definitions: Please refer to
+			  Documentation/devicetree/bindings/net/phy.txt and
+			  Documentation/devicetree/bindings/net/ethernet.txt
+
+    Examples:
 
 	EMAC0: ethernet@40000800 {
 		device_type = "network";
@@ -104,6 +121,48 @@
 		zmii-channel = <0>;
 	};
 
+	EMAC1: ethernet@ef600c00 {
+		device_type = "network";
+		compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+		interrupt-parent = <&EMAC1>;
+		interrupts = <0 1>;
+		#interrupt-cells = <1>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */
+				 1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>;
+		reg = <0xef600c00 0x000000c4>;
+		local-mac-address = [000000000000]; /* Filled in by U-Boot */
+		mal-device = <&MAL0>;
+		mal-tx-channel = <0>;
+		mal-rx-channel = <0>;
+		cell-index = <0>;
+		max-frame-size = <9000>;
+		rx-fifo-size = <16384>;
+		tx-fifo-size = <2048>;
+		fifo-entry-size = <10>;
+		phy-mode = "rgmii";
+		phy-handle = <&phy0>;
+		phy-map = <0x00000000>;
+		rgmii-device = <&RGMII0>;
+		rgmii-channel = <0>;
+		tah-device = <&TAH0>;
+		tah-channel = <0>;
+		has-inverted-stacr-oc;
+		has-new-stacr-staopc;
+
+	        mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			phy0: ethernet-phy@0 {
+				compatible = "ethernet-phy-ieee802.3-c22";
+				reg = <0>;
+			};
+		};
+	};
+
+
       ii) McMAL node
 
     Required properties:
@@ -145,4 +204,3 @@
     - revision           : as provided by the RGMII new version register if
 			   available.
 			   For Axon: 0x0000012a
-
-- 
cgit v1.2.3


From 0d5370d1d85251e5893ab7c90a429464de2e140b Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Mon, 27 Feb 2017 17:08:44 +0900
Subject: PCI: Prevent VPD access for QLogic ISP2722

QLogic ISP2722-based 16/32Gb Fibre Channel to PCIe Adapter has the VPD
access issue too, while read the common pci-sysfs access interface shown as

 /sys/devices/pci0000:00/0000:00:03.2/0000:0b:00.0/vpd

with simple 'cat' could cause system hang and panic:

  Kernel panic - not syncing: An NMI occurred. Depending on your system the reason for the NMI is logged in any one of the following resources:
  1. Integrated Management Log (IML)
  2. OA Syslog
  3. OA Forward Progress Log
  4. iLO Event Log
  CPU: 0 PID: 15070 Comm: udevadm Not tainted 4.1.12
  Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015
   0000000000000086 000000007f0cdf51 ffff880c4fa05d58 ffffffff817193de
   ffffffffa00b42d8 0000000000000075 ffff880c4fa05dd8 ffffffff81714072
   0000000000000008 ffff880c4fa05de8 ffff880c4fa05d88 000000007f0cdf51
  Call Trace:
   <NMI>  [<ffffffff817193de>] dump_stack+0x63/0x81
   [<ffffffff81714072>] panic+0xd0/0x20e
   [<ffffffffa00b390d>] hpwdt_pretimeout+0xdd/0xe0 [hpwdt]
   [<ffffffff81021fc9>] ? sched_clock+0x9/0x10
   [<ffffffff8101c101>] nmi_handle+0x91/0x170
   [<ffffffff8101c10c>] ? nmi_handle+0x9c/0x170
   [<ffffffff8101c5fe>] io_check_error+0x1e/0xa0
   [<ffffffff8101c719>] default_do_nmi+0x99/0x140
   [<ffffffff8101c8b4>] do_nmi+0xf4/0x170
   [<ffffffff817232c5>] end_repeat_nmi+0x1a/0x1e
   [<ffffffff815d724b>] ? pci_conf1_read+0xeb/0x120
   [<ffffffff815d724b>] ? pci_conf1_read+0xeb/0x120
   [<ffffffff815d724b>] ? pci_conf1_read+0xeb/0x120
   <<EOE>>  [<ffffffff815db4b3>] raw_pci_read+0x23/0x40
   [<ffffffff815db4fc>] pci_read+0x2c/0x30
   [<ffffffff8136f612>] pci_user_read_config_word+0x72/0x110
   [<ffffffff8136f746>] pci_vpd_pci22_wait+0x96/0x130
   [<ffffffff8136ff9b>] pci_vpd_pci22_read+0xdb/0x1a0
   [<ffffffff8136ea30>] pci_read_vpd+0x20/0x30
   [<ffffffff8137d590>] read_vpd_attr+0x30/0x40
   [<ffffffff8128e037>] sysfs_kf_bin_read+0x47/0x70
   [<ffffffff8128d24e>] kernfs_fop_read+0xae/0x180
   [<ffffffff8120dd97>] __vfs_read+0x37/0x100
   [<ffffffff812ba7e4>] ? security_file_permission+0x84/0xa0
   [<ffffffff8120e366>] ? rw_verify_area+0x56/0xe0
   [<ffffffff8120e476>] vfs_read+0x86/0x140
   [<ffffffff8120f3f5>] SyS_read+0x55/0xd0
   [<ffffffff81720f2e>] system_call_fastpath+0x12/0x71
  Shutting down cpus with NMI
  Kernel Offset: disabled
  drm_kms_helper: panic occurred, switching back to text console

So blacklist the access to its VPD.

Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.6+
---
 drivers/pci/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f754453fe754..673683660b5c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2174,6 +2174,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
 		quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd);
 
 /*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
-- 
cgit v1.2.3


From 3bd7db63a841e8c5297bb18ad801df67d5e38ad2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 1 Mar 2017 00:25:40 -0800
Subject: PCI/ASPM: Always set link->downstream to avoid NULL dereference on
 remove

We call pcie_aspm_exit_link_state() when we remove a device.  If the device
is the last PCIe function to be removed below a bridge and the bridge has
an ASPM link_state struct, we disable ASPM on the link.  Disabling ASPM
requires link->downstream (used in pcie_config_aspm_link()).

We previously set link->downstream in pcie_aspm_cap_init(), but only if the
device was not blacklisted.  Removing the blacklisted device caused a NULL
pointer dereference in the pcie_aspm_exit_link_state() ->
pcie_config_aspm_link() path:

  # echo 1 > /sys/bus/pci/devices/0000\:0b\:00.0/remove
  ...
   BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
   IP: pcie_config_aspm_link+0x5d/0x2b0
   Call Trace:
    pcie_aspm_exit_link_state+0x75/0x130
    pci_stop_bus_device+0xa4/0xb0
    pci_stop_and_remove_bus_device_locked+0x1a/0x30
    remove_store+0x50/0x70
    dev_attr_store+0x18/0x30
    sysfs_kf_write+0x44/0x60
    kernfs_fop_write+0x10e/0x190
    __vfs_write+0x28/0x110
    ? rcu_read_lock_sched_held+0x5d/0x80
    ? rcu_sync_lockdep_assert+0x2c/0x60
    ? __sb_start_write+0x173/0x1a0
    ? vfs_write+0xb3/0x180
    vfs_write+0xc4/0x180
    SyS_write+0x49/0xa0
    do_syscall_64+0xa6/0x1c0
    entry_SYSCALL64_slow_path+0x25/0x25
   ---[ end trace bd187ee0267df5d9 ]---

To avoid this, set link->downstream in alloc_pcie_link_state(), so every
pcie_link_state structure has a valid link->downstream pointer.

[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rajat Jain <rajatja@google.com>
CC: stable@vger.kernel.org
---
 drivers/pci/pcie/aspm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 973472c23d89..1dfa10cc566b 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -478,7 +478,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
 
 static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 {
-	struct pci_dev *child, *parent = link->pdev;
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
 	struct pci_bus *linkbus = parent->subordinate;
 	struct aspm_register_info upreg, dwreg;
 
@@ -491,9 +491,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 
 	/* Get upstream/downstream components' register state */
 	pcie_get_aspm_reg(parent, &upreg);
-	child = pci_function_0(linkbus);
 	pcie_get_aspm_reg(child, &dwreg);
-	link->downstream = child;
 
 	/*
 	 * If ASPM not supported, don't mess with the clocks and link,
@@ -800,6 +798,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
 	INIT_LIST_HEAD(&link->children);
 	INIT_LIST_HEAD(&link->link);
 	link->pdev = pdev;
+	link->downstream = pci_function_0(pdev->subordinate);
 
 	/*
 	 * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe
-- 
cgit v1.2.3


From 0253f2681f4445e9003fb27dc743ef7d25e8f3b9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:12:04 +0100
Subject: net/mlx5e: add IPV6 dependency

The ethernet support now calls directly into the ipv6 core code, which
fails if IPV6 is a loadable module but mlx5 is built-in:

drivers/net/ethernet/mellanox/mlx5/core/en_tc.o: In function `mlx5e_create_encap_header_ipv6':
en_tc.c:(.text.mlx5e_create_encap_header_ipv6+0x110): undefined reference to `ip6_route_output_flags'

This adds a dependency to ensure that MLX5_CORE_EN can only be built
if we are able link the kernel successfully. The downside is that the
ethernet option can be hidden. Alternatively we could make MLX5_CORE
depend on "IPV6 || !IPV6", which would force MLX5_CORE to be a module
when IPV6 is, including in configurations where we don't use the ethernet
support at all.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index ddb4ca4ff930..117170014e88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -14,6 +14,7 @@ config MLX5_CORE
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	depends on IPV6=y || IPV6=n || MLX5_CORE=m
 	imply PTP_1588_CLOCK
 	default n
 	---help---
-- 
cgit v1.2.3


From 4342696df764ec65dcdfbd0c10d90ea52505f8ba Mon Sep 17 00:00:00 2001
From: "Blomme, Maarten" <Maarten.Blomme@flir.com>
Date: Thu, 2 Mar 2017 13:08:36 +0100
Subject: spi_ks8995: fix "BUG: key accdaa28 not in .data!"

Signed-off-by: Maarten Blomme <Maarten.Blomme@flir.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/spi_ks8995.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 93ffedfa2994..aa7209d794f3 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -498,6 +498,7 @@ static int ks8995_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
+	sysfs_attr_init(&ks->regs_attr.attr);
 	err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
 	if (err) {
 		dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
-- 
cgit v1.2.3


From 239870f2a0ebf75cc8f6d987dc528c5243f93d69 Mon Sep 17 00:00:00 2001
From: "Blomme, Maarten" <Maarten.Blomme@flir.com>
Date: Thu, 2 Mar 2017 13:08:49 +0100
Subject: spi_ks8995: regs_size incorrect for some devices

Signed-off-by: Maarten Blomme <Maarten.Blomme@flir.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/spi_ks8995.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index aa7209d794f3..1e2d4f1179da 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -491,8 +491,8 @@ static int ks8995_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
-	ks->regs_attr.size = ks->chip->regs_size;
 	memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+	ks->regs_attr.size = ks->chip->regs_size;
 
 	err = ks8995_reset(ks);
 	if (err)
-- 
cgit v1.2.3


From 466e8bf10ac104d96e1ea813e8126e11cb72ea20 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:28 +0100
Subject: bnx2x: prevent crash when accessing PTP with interface down

It is possible to crash the kernel by accessing a PTP device while its
associated bnx2x interface is down. Before the interface is brought up,
the timecounter is not initialized, so accessing it results in NULL
dereference.

Fix it by checking if the interface is up.

Use -ENETDOWN as the error code when the interface is down.
 -EFAULT in bnx2x_ptp_adjfreq() did not seem right.

Tested using phc_ctl get/set/adj/freq commands.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index d8d06fdfc42b..d57290b9ea75 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13738,7 +13738,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 	if (!netif_running(bp->dev)) {
 		DP(BNX2X_MSG_PTP,
 		   "PTP adjfreq called while the interface is down\n");
-		return -EFAULT;
+		return -ENETDOWN;
 	}
 
 	if (ppb < 0) {
@@ -13797,6 +13797,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 
+	if (!netif_running(bp->dev)) {
+		DP(BNX2X_MSG_PTP,
+		   "PTP adjtime called while the interface is down\n");
+		return -ENETDOWN;
+	}
+
 	DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
 
 	timecounter_adjtime(&bp->timecounter, delta);
@@ -13809,6 +13815,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 	u64 ns;
 
+	if (!netif_running(bp->dev)) {
+		DP(BNX2X_MSG_PTP,
+		   "PTP gettime called while the interface is down\n");
+		return -ENETDOWN;
+	}
+
 	ns = timecounter_read(&bp->timecounter);
 
 	DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13824,6 +13836,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp,
 	struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 	u64 ns;
 
+	if (!netif_running(bp->dev)) {
+		DP(BNX2X_MSG_PTP,
+		   "PTP settime called while the interface is down\n");
+		return -ENETDOWN;
+	}
+
 	ns = timespec64_to_ns(ts);
 
 	DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
-- 
cgit v1.2.3


From 850268d320f0c7c5eb7ad0a62ef21859fa331ded Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:29 +0100
Subject: bnx2x: lower verbosity of VF stats debug messages

When BNX2X_MSG_IOV is enabled, the driver produces too many VF statistics
messages. Lower the verbosity of the VF stats messages similarly as in
commit 76ca70fabbdaa3 ("bnx2x: [Debug] change verbosity of some prints").

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 6fad22adbbb9..9f0f851774f2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1899,7 +1899,8 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 			continue;
 		}
 
-		DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+		DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+		       "add addresses for vf %d\n", vf->abs_vfid);
 		for_each_vfq(vf, j) {
 			struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
 
@@ -1920,11 +1921,12 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 				cpu_to_le32(U64_HI(q_stats_addr));
 			cur_query_entry->address.lo =
 				cpu_to_le32(U64_LO(q_stats_addr));
-			DP(BNX2X_MSG_IOV,
-			   "added address %x %x for vf %d queue %d client %d\n",
-			   cur_query_entry->address.hi,
-			   cur_query_entry->address.lo, cur_query_entry->funcID,
-			   j, cur_query_entry->index);
+			DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+			       "added address %x %x for vf %d queue %d client %d\n",
+			       cur_query_entry->address.hi,
+			       cur_query_entry->address.lo,
+			       cur_query_entry->funcID,
+			       j, cur_query_entry->index);
 			cur_query_entry++;
 			cur_data_offset += sizeof(struct per_queue_stats);
 			stats_count++;
-- 
cgit v1.2.3


From 22118d861cec5da6ed525aaf12a3de9bfeffc58f Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:30 +0100
Subject: bnx2x: fix possible overrun of VFPF multicast addresses array

It is too late to check for the limit of the number of VF multicast
addresses after they have already been copied to the req->multicast[]
array, possibly overflowing it.

Do the check before copying.

Also fix the error path to not skip unlocking vf2pf_mutex.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index bfae300cf25f..c2d327d9dff0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 	struct bnx2x *bp = netdev_priv(dev);
 	struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
 	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
-	int rc, i = 0;
+	int rc = 0, i = 0;
 	struct netdev_hw_addr *ha;
 
 	if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 	/* Get Rx mode requested */
 	DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
 
+	/* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+	if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+		DP(NETIF_MSG_IFUP,
+		   "VF supports not more than %d multicast MAC addresses\n",
+		   PFVF_MAX_MULTICAST_PER_VF);
+		rc = -EINVAL;
+		goto out;
+	}
+
 	netdev_for_each_mc_addr(ha, dev) {
 		DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
 		   bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 		i++;
 	}
 
-	/* We support four PFVF_MAX_MULTICAST_PER_VF mcast
-	  * addresses tops
-	  */
-	if (i >= PFVF_MAX_MULTICAST_PER_VF) {
-		DP(NETIF_MSG_IFUP,
-		   "VF supports not more than %d multicast MAC addresses\n",
-		   PFVF_MAX_MULTICAST_PER_VF);
-		return -EINVAL;
-	}
-
 	req->n_multicast = i;
 	req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
 	req->vf_qid = 0;
@@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 out:
 	bnx2x_vfpf_finalize(bp, &req->first_tlv);
 
-	return 0;
+	return rc;
 }
 
 /* request pf to add a vlan for the vf */
-- 
cgit v1.2.3


From 83bd9eb8fc69cdd5135ed6e1f066adc8841800fd Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:31 +0100
Subject: bnx2x: fix detection of VLAN filtering feature for VF

VFs are currently missing the VLAN filtering feature, because we were
checking the PF's acquire response before actually performing the acquire.

Fix it by setting the feature flag later when we have the PF response.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index d57290b9ea75..ac76fc251d26 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13292,17 +13292,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
 
-	/* VF with OLD Hypervisor or old PF do not support filtering */
 	if (IS_PF(bp)) {
 		if (chip_is_e1x)
 			bp->accept_any_vlan = true;
 		else
 			dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
-	} else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
-		dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
 	}
+	/* For VF we'll know whether to enable VLAN filtering after
+	 * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+	 */
 
 	dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
 	dev->features |= NETIF_F_HIGHDMA;
@@ -14009,6 +14007,14 @@ static int bnx2x_init_one(struct pci_dev *pdev,
 		rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
 		if (rc)
 			goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+		/* VF with OLD Hypervisor or old PF do not support filtering */
+		if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+			dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+			dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+		}
+#endif
 	}
 
 	/* Enable SRIOV if capability found in configuration space */
-- 
cgit v1.2.3


From 78d5505432436516456c12abbe705ec8dee7ee2b Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:32 +0100
Subject: bnx2x: do not rollback VF MAC/VLAN filters we did not configure

On failure to configure a VF MAC/VLAN filter we should not attempt to
rollback filters that we failed to configure with -EEXIST.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 8 +++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9f0f851774f2..2068bb8f5495 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
 
 	/* Add/Remove the filter */
 	rc = bnx2x_config_vlan_mac(bp, &ramrod);
-	if (rc && rc != -EEXIST) {
+	if (rc == -EEXIST)
+		return 0;
+	if (rc) {
 		BNX2X_ERR("Failed to %s %s\n",
 			  filter->add ? "add" : "delete",
 			  (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
 		return rc;
 	}
 
+	filter->applied = true;
+
 	return 0;
 }
 
@@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
 			  i, filters->count + 1);
 		while (--i >= 0) {
+			if (!filters->filters[i].applied)
+				continue;
 			filters->filters[i].add = !filters->filters[i].add;
 			bnx2x_vf_mac_vlan_config(bp, vf, qid,
 						 &filters->filters[i],
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 7a6d406f4c11..888d0b6632e8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter {
 	(BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
 
 	bool add;
+	bool applied;
 	u8 *mac;
 	u16 vid;
 };
-- 
cgit v1.2.3


From 74bcbeb7d77ec92e4262fc340cb436ef7d98ba01 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:33 +0100
Subject: bnx2x: fix incorrect filter count in an error message

filters->count is the number of filters we were supposed to configure.
There is no reason to increase it by +1 when printing the count in an error
message.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 2068bb8f5495..bdfd53b46bc5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -473,7 +473,7 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	/* Rollback if needed */
 	if (i != filters->count) {
 		BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
-			  i, filters->count + 1);
+			  i, filters->count);
 		while (--i >= 0) {
 			if (!filters->filters[i].applied)
 				continue;
-- 
cgit v1.2.3


From e39513259450a8312cb98a9a3b16bb924310dbcc Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:34 +0100
Subject: bnx2x: add missing configuration of VF VLAN filters

Configuring VLANs from the VF side had no effect, because the PF ignored
filters of type VFPF_VLAN_FILTER in the VF-PF message.

Add the missing filter type to configure.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index c2d327d9dff0..76a4668c50fe 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -1777,6 +1777,23 @@ static int bnx2x_vf_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf)
 				goto op_err;
 		}
 
+		/* build vlan list */
+		fl = NULL;
+
+		rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+					       VFPF_VLAN_FILTER);
+		if (rc)
+			goto op_err;
+
+		if (fl) {
+			/* set vlan list */
+			rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl,
+							   msg->vf_qid,
+							   false);
+			if (rc)
+				goto op_err;
+		}
+
 	}
 
 	if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {
-- 
cgit v1.2.3


From 02b2faaf0af1d85585f6d6980e286d53612acfc2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 14:08:21 -0800
Subject: tcp: fix various issues for sockets morphing to listen state

Dmitry Vyukov reported a divide by 0 triggered by syzkaller, exploiting
tcp_disconnect() path that was never really considered and/or used
before syzkaller ;)

I was not able to reproduce the bug, but it seems issues here are the
three possible actions that assumed they would never trigger on a
listener.

1) tcp_write_timer_handler
2) tcp_delack_timer_handler
3) MTU reduction

Only IPv6 MTU reduction was properly testing TCP_CLOSE and TCP_LISTEN
 states from tcp_v6_mtu_reduced()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c  | 7 +++++--
 net/ipv4/tcp_timer.c | 6 ++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9a89b8deafae..8f3ec1365497 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -279,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
  */
 void tcp_v4_mtu_reduced(struct sock *sk)
 {
-	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
-	u32 mtu = tcp_sk(sk)->mtu_info;
+	struct dst_entry *dst;
+	u32 mtu;
 
+	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+		return;
+	mtu = tcp_sk(sk)->mtu_info;
 	dst = inet_csk_update_pmtu(sk, mtu);
 	if (!dst)
 		return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 40d893556e67..b2ab411c6d37 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
 
 	sk_mem_reclaim_partial(sk);
 
-	if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+	    !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
 		goto out;
 
 	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int event;
 
-	if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+	    !icsk->icsk_pending)
 		goto out;
 
 	if (time_after(icsk->icsk_timeout, jiffies)) {
-- 
cgit v1.2.3


From 146d8fef9da1ba854de7fa28b9cb9eb147535f88 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 4 Mar 2017 00:01:41 +0000
Subject: rxrpc: Call state should be read with READ_ONCE() under some
 circumstances

The call state may be changed at any time by the data-ready routine in
response to received packets, so if the call state is to be read and acted
upon several times in a function, READ_ONCE() must be used unless the call
state lock is held.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/input.c   | 12 +++++++-----
 net/rxrpc/recvmsg.c |  4 ++--
 net/rxrpc/sendmsg.c | 48 ++++++++++++++++++++++++++++++------------------
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9f4cfa25af7c..d74921c4969b 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -420,6 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     u16 skew)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	enum rxrpc_call_state state;
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
 	unsigned int ix;
 	rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
@@ -434,14 +435,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
 	_proto("Rx DATA %%%u { #%u f=%02x }",
 	       sp->hdr.serial, seq, sp->hdr.flags);
 
-	if (call->state >= RXRPC_CALL_COMPLETE)
+	state = READ_ONCE(call->state);
+	if (state >= RXRPC_CALL_COMPLETE)
 		return;
 
 	/* Received data implicitly ACKs all of the request packets we sent
 	 * when we're acting as a client.
 	 */
-	if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
-	     call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+	if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+	     state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
 	    !rxrpc_receiving_reply(call))
 		return;
 
@@ -799,7 +801,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 		return rxrpc_proto_abort("AK0", call, 0);
 
 	/* Ignore ACKs unless we are or have just been transmitting. */
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_CLIENT_SEND_REQUEST:
 	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
 	case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -940,7 +942,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
 static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
 					  struct rxrpc_call *call)
 {
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_SERVER_AWAIT_ACK:
 		rxrpc_call_completed(call);
 		break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 6491ca46a03f..3e2f1a8e9c5b 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -527,7 +527,7 @@ try_again:
 		msg->msg_namelen = len;
 	}
 
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_SERVER_ACCEPTING:
 		ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
 		break;
@@ -640,7 +640,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 
 	mutex_lock(&call->user_mutex);
 
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
 	case RXRPC_CALL_SERVER_RECV_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index bc2d3dcff9de..47ccfeacc1c6 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -488,6 +488,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	__releases(&rx->sk.sk_lock.slock)
 {
+	enum rxrpc_call_state state;
 	enum rxrpc_command cmd;
 	struct rxrpc_call *call;
 	unsigned long user_call_ID = 0;
@@ -526,13 +527,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 			return PTR_ERR(call);
 		/* ... and we have the call lock. */
 	} else {
-		ret = -EBUSY;
-		if (call->state == RXRPC_CALL_UNINITIALISED ||
-		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
-		    call->state == RXRPC_CALL_SERVER_SECURING ||
-		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
+		switch (READ_ONCE(call->state)) {
+		case RXRPC_CALL_UNINITIALISED:
+		case RXRPC_CALL_CLIENT_AWAIT_CONN:
+		case RXRPC_CALL_SERVER_PREALLOC:
+		case RXRPC_CALL_SERVER_SECURING:
+		case RXRPC_CALL_SERVER_ACCEPTING:
+			ret = -EBUSY;
 			goto error_release_sock;
+		default:
+			break;
+		}
 
 		ret = mutex_lock_interruptible(&call->user_mutex);
 		release_sock(&rx->sk);
@@ -542,10 +547,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		}
 	}
 
+	state = READ_ONCE(call->state);
 	_debug("CALL %d USR %lx ST %d on CONN %p",
-	       call->debug_id, call->user_call_ID, call->state, call->conn);
+	       call->debug_id, call->user_call_ID, state, call->conn);
 
-	if (call->state >= RXRPC_CALL_COMPLETE) {
+	if (state >= RXRPC_CALL_COMPLETE) {
 		/* it's too late for this call */
 		ret = -ESHUTDOWN;
 	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
@@ -555,12 +561,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	} else if (cmd != RXRPC_CMD_SEND_DATA) {
 		ret = -EINVAL;
 	} else if (rxrpc_is_client_call(call) &&
-		   call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+		   state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
 		/* request phase complete for this client call */
 		ret = -EPROTO;
 	} else if (rxrpc_is_service_call(call) &&
-		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		   state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   state != RXRPC_CALL_SERVER_SEND_REPLY) {
 		/* Reply phase not begun or not complete for service call. */
 		ret = -EPROTO;
 	} else {
@@ -605,14 +611,20 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	_debug("CALL %d USR %lx ST %d on CONN %p",
 	       call->debug_id, call->user_call_ID, call->state, call->conn);
 
-	if (call->state >= RXRPC_CALL_COMPLETE) {
-		ret = -ESHUTDOWN; /* it's too late for this call */
-	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-		ret = -EPROTO; /* request phase complete for this client call */
-	} else {
+	switch (READ_ONCE(call->state)) {
+	case RXRPC_CALL_CLIENT_SEND_REQUEST:
+	case RXRPC_CALL_SERVER_ACK_REQUEST:
+	case RXRPC_CALL_SERVER_SEND_REPLY:
 		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+		break;
+	case RXRPC_CALL_COMPLETE:
+		/* It's too late for this call */
+		ret = -ESHUTDOWN;
+		break;
+	default:
+		 /* Request phase complete for this client call */
+		ret = -EPROTO;
+		break;
 	}
 
 	mutex_unlock(&call->user_mutex);
-- 
cgit v1.2.3


From dd4f10722aeb10f4f582948839f066bebe44e5fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 21:01:02 -0800
Subject: net: fix socket refcounting in skb_complete_wifi_ack()

TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt
By the time TX completion happens, sk_refcnt might be already 0.

sock_hold()/sock_put() would then corrupt critical state, like
sk_wmem_alloc.

Fixes: bf7fa551e0ce ("mac80211: Resolve sk_refcnt/sk_wmem_alloc issue in wifi ack path")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f3557958e9bf..e2f37a560ec4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3893,7 +3893,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
 {
 	struct sock *sk = skb->sk;
 	struct sock_exterr_skb *serr;
-	int err;
+	int err = 1;
 
 	skb->wifi_acked_valid = 1;
 	skb->wifi_acked = acked;
@@ -3903,14 +3903,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
 
-	/* take a reference to prevent skb_orphan() from freeing the socket */
-	sock_hold(sk);
-
-	err = sock_queue_err_skb(sk, skb);
+	/* Take a reference to prevent skb_orphan() from freeing the socket,
+	 * but only if the socket refcount is not zero.
+	 */
+	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+		err = sock_queue_err_skb(sk, skb);
+		sock_put(sk);
+	}
 	if (err)
 		kfree_skb(skb);
-
-	sock_put(sk);
 }
 EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
 
-- 
cgit v1.2.3


From 9ac25fc063751379cb77434fef9f3b088cd3e2f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 21:01:03 -0800
Subject: net: fix socket refcounting in skb_complete_tx_timestamp()

TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt
By the time TX completion happens, sk_refcnt might be already 0.

sock_hold()/sock_put() would then corrupt critical state, like
sk_wmem_alloc and lead to leaks or use after free.

Fixes: 62bccb8cdb69 ("net-timestamp: Make the clone operation stand-alone from phy timestamping")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e2f37a560ec4..cd4ba8c6b609 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3828,13 +3828,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	if (!skb_may_tx_timestamp(sk, false))
 		return;
 
-	/* take a reference to prevent skb_orphan() from freeing the socket */
-	sock_hold(sk);
-
-	*skb_hwtstamps(skb) = *hwtstamps;
-	__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
-	sock_put(sk);
+	/* Take a reference to prevent skb_orphan() from freeing the socket,
+	 * but only if the socket refcount is not zero.
+	 */
+	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+		*skb_hwtstamps(skb) = *hwtstamps;
+		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+		sock_put(sk);
+	}
 }
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
-- 
cgit v1.2.3


From 8edc3affc0770886c7bfb3436b0fdd09bce13167 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Sat, 4 Mar 2017 08:57:33 -0800
Subject: rds: tcp: Take explicit refcounts on struct net

It is incorrect for the rds_connection to piggyback on the
sock_net() refcount for the netns because this gives rise to
a chicken-and-egg problem during rds_conn_destroy. Instead explicitly
take a ref on the net, and hold the netns down till the connection
tear-down is complete.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 1 +
 net/rds/rds.h        | 6 +++---
 net/rds/tcp.c        | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 0e04dcceb1d4..1fa75ab7b733 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	 */
 	rds_cong_remove_conn(conn);
 
+	put_net(conn->c_net);
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 39518ef7af4d..82d38ccf5e8b 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -147,7 +147,7 @@ struct rds_connection {
 
 	/* Protocol version */
 	unsigned int		c_version;
-	possible_net_t		c_net;
+	struct net		*c_net;
 
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
@@ -162,13 +162,13 @@ struct rds_connection {
 static inline
 struct net *rds_conn_net(struct rds_connection *conn)
 {
-	return read_pnet(&conn->c_net);
+	return conn->c_net;
 }
 
 static inline
 void rds_conn_net_set(struct rds_connection *conn, struct net *net)
 {
-	write_pnet(&conn->c_net, net);
+	conn->c_net = get_net(net);
 }
 
 #define RDS_FLAG_CONG_BITMAP	0x01
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index a973d3b4dff0..65c8e3b3b710 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -529,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net)
 	flush_work(&rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+		struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
 		if (net != c_net || !tc->t_sock)
 			continue;
@@ -584,7 +584,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+		struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
 		if (net != c_net || !tc->t_sock)
 			continue;
-- 
cgit v1.2.3


From 16c09b1c7657522a321b04aa7f4300865b7cb292 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Sat, 4 Mar 2017 08:57:34 -0800
Subject: rds: tcp: Reorder initialization sequence in rds_tcp_init to avoid
 races

Order of initialization in rds_tcp_init needs to be done so
that resources are set up and destroyed in the correct synchronization
sequence with both the data path, as well as netns create/destroy
path. Specifically,

- we must call register_pernet_subsys and get the rds_tcp_netid
  before calling register_netdevice_notifier, otherwise we risk
  the sequence
    1. register_netdevice_notifier sets up netdev notifier callback
    2. rds_tcp_dev_event -> rds_tcp_kill_sock uses netid 0, and finds
       the wrong rtn, resulting in a panic with string that is of the form:

  BUG: unable to handle kernel NULL pointer dereference at 000000000000000d
  IP: rds_tcp_kill_sock+0x3a/0x1d0 [rds_tcp]
         :

- the rds_tcp_incoming_slab kmem_cache must be initialized before the
  datapath starts up. The latter can happen any time after the
  pernet_subsys registration of rds_tcp_net_ops, whose -> init
  function sets up the listen socket. If the rds_tcp_incoming_slab has
  not been set up at that time, a panic of the form below may be
  encountered

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000014
  IP: kmem_cache_alloc+0x90/0x1c0
     :
  rds_tcp_data_recv+0x1e7/0x370 [rds_tcp]
  tcp_read_sock+0x96/0x1c0
  rds_tcp_recv_path+0x65/0x80 [rds_tcp]
     :

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 65c8e3b3b710..fbf807a0cc4a 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -638,19 +638,19 @@ static int rds_tcp_init(void)
 		goto out;
 	}
 
-	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-	if (ret) {
-		pr_warn("could not register rds_tcp_dev_notifier\n");
+	ret = rds_tcp_recv_init();
+	if (ret)
 		goto out_slab;
-	}
 
 	ret = register_pernet_subsys(&rds_tcp_net_ops);
 	if (ret)
-		goto out_notifier;
+		goto out_recv;
 
-	ret = rds_tcp_recv_init();
-	if (ret)
+	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+	if (ret) {
+		pr_warn("could not register rds_tcp_dev_notifier\n");
 		goto out_pernet;
+	}
 
 	rds_trans_register(&rds_tcp_transport);
 
@@ -660,9 +660,8 @@ static int rds_tcp_init(void)
 
 out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
-out_notifier:
-	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-		pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_recv:
+	rds_tcp_recv_exit();
 out_slab:
 	kmem_cache_destroy(rds_tcp_conn_slab);
 out:
-- 
cgit v1.2.3


From b21dd4506b71bdb9c5a20e759255cd2513ea7ebe Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Sat, 4 Mar 2017 08:57:35 -0800
Subject: rds: tcp: Sequence teardown of listen and acceptor sockets to avoid
 races

Commit a93d01f5777e ("RDS: TCP: avoid bad page reference in
rds_tcp_listen_data_ready") added the function
rds_tcp_listen_sock_def_readable()  to handle the case when a
partially set-up acceptor socket drops into rds_tcp_listen_data_ready().
However, if the listen socket (rtn->rds_tcp_listen_sock) is itself going
through a tear-down via rds_tcp_listen_stop(), the (*ready)() will be
null and we would hit a panic  of the form
  BUG: unable to handle kernel NULL pointer dereference at   (null)
  IP:           (null)
   :
  ? rds_tcp_listen_data_ready+0x59/0xb0 [rds_tcp]
  tcp_data_queue+0x39d/0x5b0
  tcp_rcv_established+0x2e5/0x660
  tcp_v4_do_rcv+0x122/0x220
  tcp_v4_rcv+0x8b7/0x980
    :
In the above case, it is not fatal to encounter a NULL value for
ready- we should just drop the packet and let the flush of the
acceptor thread finish gracefully.

In general, the tear-down sequence for listen() and accept() socket
that is ensured by this commit is:
     rtn->rds_tcp_listen_sock = NULL; /* prevent any new accepts */
     In rds_tcp_listen_stop():
         serialize with, and prevent, further callbacks using lock_sock()
         flush rds_wq
         flush acceptor workq
         sock_release(listen socket)

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c        | 15 ++++++++++-----
 net/rds/tcp.h        |  2 +-
 net/rds/tcp_listen.c |  9 +++++++--
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index fbf807a0cc4a..225690076773 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
 	 * we do need to clean up the listen socket here.
 	 */
 	if (rtn->rds_tcp_listen_sock) {
-		rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+		struct socket *lsock = rtn->rds_tcp_listen_sock;
+
 		rtn->rds_tcp_listen_sock = NULL;
-		flush_work(&rtn->rds_tcp_accept_w);
+		rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
 	}
 }
 
@@ -523,10 +524,10 @@ static void rds_tcp_kill_sock(struct net *net)
 	struct rds_tcp_connection *tc, *_tc;
 	LIST_HEAD(tmp_list);
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-	rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
 	rtn->rds_tcp_listen_sock = NULL;
-	flush_work(&rtn->rds_tcp_accept_w);
+	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = tc->t_cpath->cp_conn->c_net;
@@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net)
 void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct socket *lsock = rtn->rds_tcp_listen_sock;
+
+	if (!lsock)
+		return NULL;
 
-	return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+	return lsock->sk->sk_user_data;
 }
 
 static int rds_tcp_dev_event(struct notifier_block *this,
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 9a1cc8906576..56ea6620fcf9 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
 struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
 int rds_tcp_keepalive(struct socket *sock);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 67d0929c7d3d..2c69a508a693 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 	 * before it has been accepted and the accepter has set up their
 	 * data_ready.. we only want to queue listen work for our listening
 	 * socket
+	 *
+	 * (*ready)() may be null if we are racing with netns delete, and
+	 * the listen socket is being torn down.
 	 */
 	if (sk->sk_state == TCP_LISTEN)
 		rds_tcp_accept_work(sk);
@@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
-	ready(sk);
+	if (ready)
+		ready(sk);
 }
 
 struct socket *rds_tcp_listen_init(struct net *net)
@@ -271,7 +275,7 @@ out:
 	return NULL;
 }
 
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
 {
 	struct sock *sk;
 
@@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock)
 
 	/* wait for accepts to stop and close the socket */
 	flush_workqueue(rds_wq);
+	flush_work(acceptor);
 	sock_release(sock);
 }
-- 
cgit v1.2.3


From 6c4dc75c251721f517e9daeb5370ea606b5b35ce Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sun, 5 Mar 2017 03:01:55 +0300
Subject: net/sched: act_skbmod: remove unneeded rcu_read_unlock in
 tcf_skbmod_dump

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_skbmod.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 3b7074e23024..c736627f8f4a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
 
 	return skb->len;
 nla_put_failure:
-	rcu_read_unlock();
 	nlmsg_trim(skb, b);
 	return -1;
 }
-- 
cgit v1.2.3


From 142c0ac445792c492579cb01f1cfd4e32e6dfcce Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Sun, 5 Mar 2017 12:18:41 -0600
Subject: ibmvnic: Fix overflowing firmware/hardware TX queue

Use a counter to track the number of outstanding transmissions sent
that have not received completions. If the counter reaches the maximum
number of queue entries, stop transmissions on that queue. As we receive
more completions from firmware, wake the queue once the counter reaches
an acceptable level.

This patch prevents hardware/firmware TX queue from filling up and
and generating errors.  Since incorporating this fix, internal testing
has reported that these firmware errors have stopped.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++++++++-
 drivers/net/ethernet/ibm/ibmvnic.h |  1 +
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9198e6bd5160..0e87b83f25e2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_tx_buff *tx_buff = NULL;
+	struct ibmvnic_sub_crq_queue *tx_scrq;
 	struct ibmvnic_tx_pool *tx_pool;
 	unsigned int tx_send_failed = 0;
 	unsigned int tx_map_failed = 0;
@@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int ret = 0;
 
 	tx_pool = &adapter->tx_pool[queue_num];
+	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
 	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
 				   be32_to_cpu(adapter->login_rsp_buf->
@@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		ret = NETDEV_TX_BUSY;
 		goto out;
 	}
+
+	atomic_inc(&tx_scrq->used);
+
+	if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+		netdev_info(netdev, "Stopping queue %d\n", queue_num);
+		netif_stop_subqueue(netdev, queue_num);
+	}
+
 	tx_packets++;
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;
@@ -1213,6 +1223,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 	scrq->adapter = adapter;
 	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
 	scrq->cur = 0;
+	atomic_set(&scrq->used, 0);
 	scrq->rx_skb_top = NULL;
 	spin_lock_init(&scrq->lock);
 
@@ -1355,8 +1366,22 @@ restart_loop:
 						 DMA_TO_DEVICE);
 			}
 
-			if (txbuff->last_frag)
+			if (txbuff->last_frag) {
+				atomic_dec(&scrq->used);
+
+				if (atomic_read(&scrq->used) <=
+				    (adapter->req_tx_entries_per_subcrq / 2) &&
+				    netif_subqueue_stopped(adapter->netdev,
+							   txbuff->skb)) {
+					netif_wake_subqueue(adapter->netdev,
+							    scrq->pool_index);
+					netdev_dbg(adapter->netdev,
+						   "Started queue %d\n",
+						   scrq->pool_index);
+				}
+
 				dev_kfree_skb_any(txbuff->skb);
+			}
 
 			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
 						     producer_index] = index;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 422824f1f42a..1993b42666f7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue {
 	spinlock_t lock;
 	struct sk_buff *rx_skb_top;
 	struct ibmvnic_adapter *adapter;
+	atomic_t used;
 };
 
 struct ibmvnic_long_term_buff {
-- 
cgit v1.2.3


From 068d9f90a6978c3e3a662d9e85204a7d6be240d2 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Sun, 5 Mar 2017 12:18:42 -0600
Subject: ibmvnic: Allocate number of rx/tx buffers agreed on by firmware

The amount of TX/RX buffers that the vNIC driver currently allocates
is different from the amount agreed upon in negotiation with firmware.
Correct that by allocating the requested number of buffers confirmed
by firmware.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0e87b83f25e2..5f11b4dc95d2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev)
 	send_map_query(adapter);
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		init_rx_pool(adapter, &adapter->rx_pool[i],
-			     IBMVNIC_BUFFS_PER_POOL, i,
+			     adapter->req_rx_add_entries_per_subcrq, i,
 			     be64_to_cpu(size_array[i]), 1);
 		if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
 			dev_err(dev, "Couldn't alloc rx pool\n");
@@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev)
 	for (i = 0; i < tx_subcrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
 		tx_pool->tx_buff =
-		    kcalloc(adapter->max_tx_entries_per_subcrq,
+		    kcalloc(adapter->req_tx_entries_per_subcrq,
 			    sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
 		if (!tx_pool->tx_buff)
 			goto tx_pool_alloc_failed;
 
 		if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-					 adapter->max_tx_entries_per_subcrq *
+					 adapter->req_tx_entries_per_subcrq *
 					 adapter->req_mtu))
 			goto tx_ltb_alloc_failed;
 
 		tx_pool->free_map =
-		    kcalloc(adapter->max_tx_entries_per_subcrq,
+		    kcalloc(adapter->req_tx_entries_per_subcrq,
 			    sizeof(int), GFP_KERNEL);
 		if (!tx_pool->free_map)
 			goto tx_fm_alloc_failed;
 
-		for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
 			tx_pool->free_map[j] = j;
 
 		tx_pool->consumer_index = 0;
@@ -746,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	tx_pool->consumer_index =
 	    (tx_pool->consumer_index + 1) %
-		adapter->max_tx_entries_per_subcrq;
+		adapter->req_tx_entries_per_subcrq;
 
 	tx_buff = &tx_pool->tx_buff[index];
 	tx_buff->skb = skb;
@@ -819,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 		if (tx_pool->consumer_index == 0)
 			tx_pool->consumer_index =
-				adapter->max_tx_entries_per_subcrq - 1;
+				adapter->req_tx_entries_per_subcrq - 1;
 		else
 			tx_pool->consumer_index--;
 
@@ -1387,7 +1387,7 @@ restart_loop:
 						     producer_index] = index;
 			adapter->tx_pool[pool].producer_index =
 			    (adapter->tx_pool[pool].producer_index + 1) %
-			    adapter->max_tx_entries_per_subcrq;
+			    adapter->req_tx_entries_per_subcrq;
 		}
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
-- 
cgit v1.2.3


From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Mar 2017 10:52:16 -0800
Subject: dccp: fix use-after-free in dccp_feat_activate_values

Dmitry reported crashes in DCCP stack [1]

Problem here is that when I got rid of listener spinlock, I missed the
fact that DCCP stores a complex state in struct dccp_request_sock,
while TCP does not.

Since multiple cpus could access it at the same time, we need to add
protection.

[1]
BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0
net/dccp/feat.c:1541 at addr ffff88003713be68
Read of size 8 by task syz-executor2/8457
CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162
 print_address_description mm/kasan/report.c:200 [inline]
 kasan_report_error mm/kasan/report.c:289 [inline]
 kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311
 kasan_report mm/kasan/report.c:332 [inline]
 __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332
 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541
 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121
 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457
 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186
 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
 </IRQ>
 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328
 do_softirq kernel/softirq.c:176 [inline]
 __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181
 local_bh_enable include/linux/bottom_half.h:31 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline]
 ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123
 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162
 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501
 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179
 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141
 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280
 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362
 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 SYSC_sendto+0x660/0x810 net/socket.c:1687
 SyS_sendto+0x40/0x50 net/socket.c:1655
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458b9
RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9
RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017
RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020
R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8
R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700
Object at ffff88003713be50, in cache kmalloc-64 size: 64
Allocated:
PID = 8446
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605
 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738
 kmalloc include/linux/slab.h:490 [inline]
 dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467
 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487
 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741
 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949
 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012
 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423
 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217
 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377
 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606
 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632
 sk_backlog_rcv include/net/sock.h:893 [inline]
 __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479
 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
Freed:
PID = 15
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578
 slab_free_hook mm/slub.c:1355 [inline]
 slab_free_freelist_hook mm/slub.c:1377 [inline]
 slab_free mm/slub.c:2954 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3874
 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418
 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline]
 dccp_feat_list_pop net/dccp/feat.c:541 [inline]
 dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543
 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121
 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457
 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186
 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
Memory state around the buggy address:
 ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb
                                                          ^

Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  1 +
 net/dccp/minisocks.c | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 61d042bbbf60..68449293c4b6 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -163,6 +163,7 @@ struct dccp_request_sock {
 	__u64			 dreq_isr;
 	__u64			 dreq_gsr;
 	__be32			 dreq_service;
+	spinlock_t		 dreq_lock;
 	struct list_head	 dreq_featneg;
 	__u32			 dreq_timestamp_echo;
 	__u32			 dreq_timestamp_time;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e267e6f4c9a5..abd07a443219 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 	bool own_req;
 
+	/* TCP/DCCP listeners became lockless.
+	 * DCCP stores complex state in its request_sock, so we need
+	 * a protection for them, now this code runs without being protected
+	 * by the parent (listener) lock.
+	 */
+	spin_lock_bh(&dreq->dreq_lock);
+
 	/* Check for retransmitted REQUEST */
 	if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
 
@@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 			inet_rtx_syn_ack(sk, req);
 		}
 		/* Network Duplicate, discard packet */
-		return NULL;
+		goto out;
 	}
 
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
@@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
 							 req, &own_req);
-	if (!child)
-		goto listen_overflow;
-
-	return inet_csk_complete_hashdance(sk, child, req, own_req);
+	if (child) {
+		child = inet_csk_complete_hashdance(sk, child, req, own_req);
+		goto out;
+	}
 
-listen_overflow:
-	dccp_pr_debug("listen_overflow!\n");
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
 drop:
 	if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
 		req->rsk_ops->send_reset(sk, skb);
 
 	inet_csk_reqsk_queue_drop(sk, req);
-	return NULL;
+out:
+	spin_unlock_bh(&dreq->dreq_lock);
+	return child;
 }
 
 EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req,
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
+	spin_lock_init(&dreq->dreq_lock);
 	inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
 	inet_rsk(req)->ir_num	   = ntohs(dccp_hdr(skb)->dccph_dport);
 	inet_rsk(req)->acked	   = 0;
-- 
cgit v1.2.3


From 15e668070a64bb97f102ad9cf3bccbca0545cda8 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sun, 5 Mar 2017 12:34:53 -0800
Subject: ipv6: reorder icmpv6_init() and ip6_mr_init()

Andrey reported the following kernel crash:

kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 14446 Comm: syz-executor6 Not tainted 4.10.0+ #82
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff88001f311700 task.stack: ffff88001f6e8000
RIP: 0010:ip6mr_sk_done+0x15a/0x3d0 net/ipv6/ip6mr.c:1618
RSP: 0018:ffff88001f6ef418 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: 1ffff10003edde8c RCX: ffffc900043ee000
RDX: 0000000000000004 RSI: ffffffff83e3b3f8 RDI: 0000000000000020
RBP: ffff88001f6ef508 R08: fffffbfff0dcc5d8 R09: 0000000000000000
R10: ffffffff86e62ec0 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88001f6ef4e0 R15: ffff8800380a0040
FS:  00007f7a52cec700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000061c500 CR3: 000000001f1ae000 CR4: 00000000000006f0
DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
Call Trace:
 rawv6_close+0x4c/0x80 net/ipv6/raw.c:1217
 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:432
 sock_release+0x8d/0x1e0 net/socket.c:597
 __sock_create+0x39d/0x880 net/socket.c:1226
 sock_create_kern+0x3f/0x50 net/socket.c:1243
 inet_ctl_sock_create+0xbb/0x280 net/ipv4/af_inet.c:1526
 icmpv6_sk_init+0x163/0x500 net/ipv6/icmp.c:954
 ops_init+0x10a/0x550 net/core/net_namespace.c:115
 setup_net+0x261/0x660 net/core/net_namespace.c:291
 copy_net_ns+0x27e/0x540 net/core/net_namespace.c:396
9pnet_virtio: no channels available for device ./file1
 create_new_namespaces+0x437/0x9b0 kernel/nsproxy.c:106
 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205
 SYSC_unshare kernel/fork.c:2281 [inline]
 SyS_unshare+0x64e/0x1000 kernel/fork.c:2231
 entry_SYSCALL_64_fastpath+0x1f/0xc2

This is because net->ipv6.mr6_tables is not initialized at that point,
ip6mr_rules_init() is not called yet, therefore on the error path when
we iterator the list, we trigger this oops. Fix this by reordering
ip6mr_rules_init() before icmpv6_sk_init().

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 04db40620ea6..a9a9553ee63d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -920,12 +920,12 @@ static int __init inet6_init(void)
 	err = register_pernet_subsys(&inet6_net_ops);
 	if (err)
 		goto register_pernet_fail;
-	err = icmpv6_init();
-	if (err)
-		goto icmp_fail;
 	err = ip6_mr_init();
 	if (err)
 		goto ipmr_fail;
+	err = icmpv6_init();
+	if (err)
+		goto icmp_fail;
 	err = ndisc_init();
 	if (err)
 		goto ndisc_fail;
@@ -1061,10 +1061,10 @@ igmp_fail:
 	ndisc_cleanup();
 ndisc_fail:
 	ip6_mr_cleanup();
-ipmr_fail:
-	icmpv6_cleanup();
 icmp_fail:
 	unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+	icmpv6_cleanup();
 register_pernet_fail:
 	sock_unregister(PF_INET6);
 	rtnl_unregister_all(PF_INET6);
-- 
cgit v1.2.3


From 97ee351b50a49717543533cfb85b4bf9d88c9680 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 7 Mar 2017 16:14:49 +1100
Subject: powerpc/boot: Fix zImage TOC alignment

Recent toolchains force the TOC to be 256 byte aligned. We need to
enforce this alignment in the zImage linker script, otherwise pointers
to our TOC variables (__toc_start) could be incorrect. If the actual
start of the TOC and __toc_start don't have the same value we crash
early in the zImage wrapper.

Cc: stable@vger.kernel.org
Suggested-by: Alan Modra <amodra@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/zImage.lds.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 861e72109df2..f080abfc2f83 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,7 @@ SECTIONS
   }
 
 #ifdef CONFIG_PPC64_BOOT_WRAPPER
+  . = ALIGN(256);
   .got :
   {
     __toc_start = .;
-- 
cgit v1.2.3


From 3802a345321a08093ba2ddb1849e736f84e8d450 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Mar 2017 16:45:58 -0800
Subject: xfs: only reclaim unwritten COW extents periodically

We only want to reclaim preallocations from our periodic work item.
Currently this is archived by looking for a dirty inode, but that check
is rather fragile.  Instead add a flag to xfs_reflink_cancel_cow_* so
that the caller can ask for just cancelling unwritten extents in the COW
fork.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
[darrick: fix typos in commit message]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c    |  2 +-
 fs/xfs/xfs_icache.c  |  2 +-
 fs/xfs/xfs_inode.c   |  2 +-
 fs/xfs/xfs_reflink.c | 23 ++++++++++++++++-------
 fs/xfs/xfs_reflink.h |  4 ++--
 fs/xfs/xfs_super.c   |  2 +-
 6 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index bf65a9ea8642..aa8a6f0d09c3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -293,7 +293,7 @@ xfs_end_io(
 			goto done;
 		if (ioend->io_bio->bi_error) {
 			error = xfs_reflink_cancel_cow_range(ip,
-					ioend->io_offset, ioend->io_size);
+					ioend->io_offset, ioend->io_size, true);
 			goto done;
 		}
 		error = xfs_reflink_end_cow(ip, ioend->io_offset,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 7234b9748c36..3531f8f72fa5 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
 	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
 
-	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
 
 	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index edfa6a55b064..7eaf1ef74e3c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(
 
 	/* Remove all pending CoW reservations. */
 	error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
-			last_block);
+			last_block, true);
 	if (error)
 		goto out;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index da6d08fb359c..4a84c5ea266d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(
 }
 
 /*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
  */
 int
 xfs_reflink_cancel_cow_blocks(
 	struct xfs_inode		*ip,
 	struct xfs_trans		**tpp,
 	xfs_fileoff_t			offset_fsb,
-	xfs_fileoff_t			end_fsb)
+	xfs_fileoff_t			end_fsb,
+	bool				cancel_real)
 {
 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	struct xfs_bmbt_irec		got, del;
@@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(
 					&idx, &got, &del);
 			if (error)
 				break;
-		} else {
+		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
 			xfs_trans_ijoin(*tpp, ip, 0);
 			xfs_defer_init(&dfops, &firstfsb);
 
@@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(
 }
 
 /*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
  */
 int
 xfs_reflink_cancel_cow_range(
 	struct xfs_inode	*ip,
 	xfs_off_t		offset,
-	xfs_off_t		count)
+	xfs_off_t		count,
+	bool			cancel_real)
 {
 	struct xfs_trans	*tp;
 	xfs_fileoff_t		offset_fsb;
@@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	/* Scrape out the old CoW reservations */
-	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+			cancel_real);
 	if (error)
 		goto out_cancel;
 
@@ -1450,7 +1459,7 @@ next:
 	 * We didn't find any shared blocks so turn off the reflink flag.
 	 * First, get rid of any leftover CoW mappings.
 	 */
-	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 33ac9b8db683..d29a7967f029 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
 
 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
 		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
-		xfs_fileoff_t end_fsb);
+		xfs_fileoff_t end_fsb, bool cancel_real);
 extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
-		xfs_off_t count);
+		xfs_off_t count, bool cancel_real);
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 890862f2447c..685c042a120f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
 	XFS_STATS_INC(ip->i_mount, vn_remove);
 
 	if (xfs_is_reflink_inode(ip)) {
-		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
 		if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
 			xfs_warn(ip->i_mount,
 "Error %d while evicting CoW blocks for inode %llu.",
-- 
cgit v1.2.3


From f1c635b439a5c01776fe3a25b1e2dc546ea82e6f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 7 Mar 2017 09:15:53 -0800
Subject: scsi: storvsc: Workaround for virtual DVD SCSI version

Hyper-V host emulation of SCSI for virtual DVD device reports SCSI
version 0 (UNKNOWN) but is still capable of supporting REPORTLUN.

Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11
successfully with virtual DVD ROM device. What happens is that the SCSI
scan process falls back to doing sequential probing by INQUIRY.  But the
storvsc driver has a previous workaround that masks/blocks all errors
reports from INQUIRY (or MODE_SENSE) commands.  This workaround causes
the scan to then populate a full set of bogus LUN's on the target and
then sends kernel spinning off into a death spiral doing block reads on
the non-existent LUNs.

By setting the correct blacklist flags, the target with the DVD device
is scanned with REPORTLUN and that works correctly.

Patch needs to go in current 4.11, it is safe but not necessary in older
kernels.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 585e54f6512c..f555174f2cb7 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels")
  */
 static int storvsc_timeout = 180;
 
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
 static struct scsi_transport_template *fc_transport_template;
 #endif
@@ -1383,6 +1381,22 @@ static int storvsc_do_io(struct hv_device *device,
 	return ret;
 }
 
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+	/*
+	 * Set blist flag to permit the reading of the VPD pages even when
+	 * the target may claim SPC-2 compliance. MSFT targets currently
+	 * claim SPC-2 compliance while they implement post SPC-2 features.
+	 * With this flag we can correctly handle WRITE_SAME_16 issues.
+	 *
+	 * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+	 * still supports REPORT LUN.
+	 */
+	sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+	return 0;
+}
+
 static int storvsc_device_configure(struct scsi_device *sdevice)
 {
 
@@ -1395,14 +1409,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
 
 	sdevice->no_write_same = 1;
 
-	/*
-	 * Add blist flags to permit the reading of the VPD pages even when
-	 * the target may claim SPC-2 compliance. MSFT targets currently
-	 * claim SPC-2 compliance while they implement post SPC-2 features.
-	 * With this patch we can correctly handle WRITE_SAME_16 issues.
-	 */
-	sdevice->sdev_bflags |= msft_blist_flags;
-
 	/*
 	 * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
 	 * if the device is a MSFT virtual device.  If the host is
@@ -1661,6 +1667,7 @@ static struct scsi_host_template scsi_driver = {
 	.eh_host_reset_handler =	storvsc_host_reset_handler,
 	.proc_name =		"storvsc_host",
 	.eh_timed_out =		storvsc_eh_timed_out,
+	.slave_alloc =		storvsc_device_alloc,
 	.slave_configure =	storvsc_device_configure,
 	.cmd_per_lun =		255,
 	.this_id =		-1,
-- 
cgit v1.2.3


From 85e8a23936ab3442de0c42da97d53b29f004ece1 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 13 Feb 2017 08:49:20 +1100
Subject: scsi: lpfc: Add shutdown method for kexec

We see lpfc devices regularly fail during kexec. Fix this by adding a
shutdown method which mirrors the remove method.

Cc: <stable@vger.kernel.org>
Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Tested-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 661bd25a404a..2697d49da4d7 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -12059,6 +12059,7 @@ static struct pci_driver lpfc_driver = {
 	.id_table	= lpfc_id_table,
 	.probe		= lpfc_pci_probe_one,
 	.remove		= lpfc_pci_remove_one,
+	.shutdown	= lpfc_pci_remove_one,
 	.suspend        = lpfc_pci_suspend_one,
 	.resume		= lpfc_pci_resume_one,
 	.err_handler    = &lpfc_err_handler,
-- 
cgit v1.2.3


From 627c845c0907894a1e5cd2d90ff4fc86c9e4458e Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Tue, 7 Mar 2017 04:08:34 -0500
Subject: drm/i915/gvt: change some gvt_err to gvt_dbg_cmd

gvt_err should be used for dumping error message. This patch changes
some gvt_err to gvt_dbg_cmd, as they are only debugging message, not
errors.

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 7bb11a555b76..f53cab604d49 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -669,7 +669,7 @@ static inline void print_opcode(u32 cmd, int ring_id)
 	if (d_info == NULL)
 		return;
 
-	gvt_err("opcode=0x%x %s sub_ops:",
+	gvt_dbg_cmd("opcode=0x%x %s sub_ops:",
 			cmd >> (32 - d_info->op_len), d_info->name);
 
 	for (i = 0; i < d_info->nr_sub_op; i++)
@@ -694,23 +694,23 @@ static void parser_exec_state_dump(struct parser_exec_state *s)
 	int cnt = 0;
 	int i;
 
-	gvt_err("  vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
+	gvt_dbg_cmd("  vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
 			" ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id,
 			s->ring_id, s->ring_start, s->ring_start + s->ring_size,
 			s->ring_head, s->ring_tail);
 
-	gvt_err("  %s %s ip_gma(%08lx) ",
+	gvt_dbg_cmd("  %s %s ip_gma(%08lx) ",
 			s->buf_type == RING_BUFFER_INSTRUCTION ?
 			"RING_BUFFER" : "BATCH_BUFFER",
 			s->buf_addr_type == GTT_BUFFER ?
 			"GTT" : "PPGTT", s->ip_gma);
 
 	if (s->ip_va == NULL) {
-		gvt_err(" ip_va(NULL)");
+		gvt_dbg_cmd(" ip_va(NULL)");
 		return;
 	}
 
-	gvt_err("  ip_va=%p: %08x %08x %08x %08x\n",
+	gvt_dbg_cmd("  ip_va=%p: %08x %08x %08x %08x\n",
 			s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
 			cmd_val(s, 2), cmd_val(s, 3));
 
-- 
cgit v1.2.3


From 787eb485509f9d58962bd8b4dbc6a5ac6e2034fe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 2 Mar 2017 15:02:51 -0800
Subject: xfs: fix and streamline error handling in xfs_end_io

There are two different cases of buffered I/O errors:

 - first we can have an already shutdown fs.  In that case we should skip
   any on-disk operations and just clean up the appen transaction if
   present and destroy the ioend
 - a real I/O error.  In that case we should cleanup any lingering COW
   blocks.  This gets skipped in the current code and is fixed by this
   patch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 59 +++++++++++++++++++++++++------------------------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index aa8a6f0d09c3..61494295d92f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
 	struct xfs_ioend	*ioend =
 		container_of(work, struct xfs_ioend, io_work);
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
+	xfs_off_t		offset = ioend->io_offset;
+	size_t			size = ioend->io_size;
 	int			error = ioend->io_bio->bi_error;
 
 	/*
-	 * Set an error if the mount has shut down and proceed with end I/O
-	 * processing so it can perform whatever cleanups are necessary.
+	 * Just clean up the in-memory strutures if the fs has been shut down.
 	 */
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		error = -EIO;
+		goto done;
+	}
 
 	/*
-	 * For a CoW extent, we need to move the mapping from the CoW fork
-	 * to the data fork.  If instead an error happened, just dump the
-	 * new blocks.
+	 * Clean up any COW blocks on an I/O error.
 	 */
-	if (ioend->io_type == XFS_IO_COW) {
-		if (error)
-			goto done;
-		if (ioend->io_bio->bi_error) {
-			error = xfs_reflink_cancel_cow_range(ip,
-					ioend->io_offset, ioend->io_size, true);
-			goto done;
+	if (unlikely(error)) {
+		switch (ioend->io_type) {
+		case XFS_IO_COW:
+			xfs_reflink_cancel_cow_range(ip, offset, size, true);
+			break;
 		}
-		error = xfs_reflink_end_cow(ip, ioend->io_offset,
-				ioend->io_size);
-		if (error)
-			goto done;
+
+		goto done;
 	}
 
 	/*
-	 * For unwritten extents we need to issue transactions to convert a
-	 * range to normal written extens after the data I/O has finished.
-	 * Detecting and handling completion IO errors is done individually
-	 * for each case as different cleanup operations need to be performed
-	 * on error.
+	 * Success:  commit the COW or unwritten blocks if needed.
 	 */
-	if (ioend->io_type == XFS_IO_UNWRITTEN) {
-		if (error)
-			goto done;
-		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-						  ioend->io_size);
-	} else if (ioend->io_append_trans) {
-		error = xfs_setfilesize_ioend(ioend, error);
-	} else {
-		ASSERT(!xfs_ioend_is_append(ioend) ||
-		       ioend->io_type == XFS_IO_COW);
+	switch (ioend->io_type) {
+	case XFS_IO_COW:
+		error = xfs_reflink_end_cow(ip, offset, size);
+		break;
+	case XFS_IO_UNWRITTEN:
+		error = xfs_iomap_write_unwritten(ip, offset, size);
+		break;
+	default:
+		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+		break;
 	}
 
 done:
+	if (ioend->io_append_trans)
+		error = xfs_setfilesize_ioend(ioend, error);
 	xfs_destroy_ioend(ioend, error);
 }
 
-- 
cgit v1.2.3


From d5825712ee98d68a2c17bc89dad2c30276894cba Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Thu, 2 Mar 2017 15:06:33 -0800
Subject: xfs: Use xfs_icluster_size_fsb() to calculate inode alignment mask

When block size is larger than inode cluster size, the call to
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size) returns 0. Also, mkfs.xfs
would have set xfs_sb->sb_inoalignmt to 0. Hence in
xfs_set_inoalignment(), xfs_mount->m_inoalign_mask gets initialized to
-1 instead of 0. However, xfs_mount->m_sinoalign would get correctly
intialized to 0 because for every positive value of xfs_mount->m_dalign,
the condition "!(mp->m_dalign & mp->m_inoalign_mask)" would evaluate to
false.

Also, xfs_imap() worked fine even with xfs_mount->m_inoalign_mask having
-1 as the value because blks_per_cluster variable would have the value 1
and hence we would never have a need to use xfs_mount->m_inoalign_mask
to compute the inode chunk's agbno and offset within the chunk.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_mount.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 450bde68bb75..688ebff1f663 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -513,8 +513,7 @@ STATIC void
 xfs_set_inoalignment(xfs_mount_t *mp)
 {
 	if (xfs_sb_version_hasalign(&mp->m_sb) &&
-	    mp->m_sb.sb_inoalignmt >=
-	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+		mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
 		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
 	else
 		mp->m_inoalign_mask = 0;
-- 
cgit v1.2.3


From 08b005f1333154ae5b404ca28766e0ffb9f1c150 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 6 Mar 2017 11:58:20 -0800
Subject: xfs: remove kmem_zalloc_greedy

The sole remaining caller of kmem_zalloc_greedy is bulkstat, which uses
it to grab 1-4 pages for staging of inobt records.  The infinite loop in
the greedy allocation function is causing hangs[1] in generic/269, so
just get rid of the greedy allocator in favor of kmem_zalloc_large.
This makes bulkstat somewhat more likely to ENOMEM if there's really no
pages to spare, but eliminates a source of hangs.

[1] http://lkml.kernel.org/r/20170301044634.rgidgdqqiiwsmfpj%40XZHOUW.usersys.redhat.com

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
v2: remove single-page fallback
---
 fs/xfs/kmem.c       | 18 ------------------
 fs/xfs/kmem.h       |  2 --
 fs/xfs/xfs_itable.c |  6 ++----
 3 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 2dfdc62f795e..70a5b55e0870 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -25,24 +25,6 @@
 #include "kmem.h"
 #include "xfs_message.h"
 
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-	void		*ptr;
-	size_t		kmsize = maxsize;
-
-	while (!(ptr = vzalloc(kmsize))) {
-		if ((kmsize >>= 1) <= minsize)
-			kmsize = minsize;
-	}
-	if (ptr)
-		*size = kmsize;
-	return ptr;
-}
-
 void *
 kmem_alloc(size_t size, xfs_km_flags_t flags)
 {
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 689f746224e7..f0fc84fcaac2 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -69,8 +69,6 @@ static inline void  kmem_free(const void *ptr)
 }
 
 
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
 static inline void *
 kmem_zalloc(size_t size, xfs_km_flags_t flags)
 {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c17..2a6d9b1558e0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -361,7 +361,6 @@ xfs_bulkstat(
 	xfs_agino_t		agino;	/* inode # in allocation group */
 	xfs_agnumber_t		agno;	/* allocation group number */
 	xfs_btree_cur_t		*cur;	/* btree cursor for ialloc btree */
-	size_t			irbsize; /* size of irec buffer in bytes */
 	xfs_inobt_rec_incore_t	*irbuf;	/* start of irec buffer */
 	int			nirbuf;	/* size of irbuf */
 	int			ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
 	*ubcountp = 0;
 	*done = 0;
 
-	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+	irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
 	if (!irbuf)
 		return -ENOMEM;
-
-	nirbuf = irbsize / sizeof(*irbuf);
+	nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
 
 	/*
 	 * Loop over the allocation groups, starting from the last
-- 
cgit v1.2.3


From aa2be9b3d6d2d699e9ca7cbfc00867c80e5da213 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Fri, 3 Mar 2017 17:56:55 +1100
Subject: crypto: powerpc - Fix initialisation of crc32c context

Turning on crypto self-tests on a POWER8 shows:

    alg: hash: Test 1 failed for crc32c-vpmsum
    00000000: ff ff ff ff

Comparing the code with the Intel CRC32c implementation on which
ours is based shows that we are doing an init with 0, not ~0
as CRC32c requires.

This probably wasn't caught because btrfs does its own weird
open-coded initialisation.

Initialise our internal context to ~0 on init.

This makes the self-tests pass, and btrfs continues to work.

Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c")
Cc: Anton Blanchard <anton@samba.org>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Axtens <dja@axtens.net>
Acked-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/crc32c-vpmsum_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 9fa046d56eba..411994551afc 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
 {
 	u32 *key = crypto_tfm_ctx(tfm);
 
-	*key = 0;
+	*key = ~0;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 07de4bc88ce6a4d898cad9aa4c99c1df7e87702d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 5 Mar 2017 19:14:07 +0200
Subject: crypto: s5p-sss - Fix completing crypto request in IRQ handler

In a regular interrupt handler driver was finishing the crypt/decrypt
request by calling complete on crypto request.  This is disallowed since
converting to skcipher in commit b286d8b1a690 ("crypto: skcipher - Add
skcipher walk interface") and causes a warning:
	WARNING: CPU: 0 PID: 0 at crypto/skcipher.c:430 skcipher_walk_first+0x13c/0x14c

The interrupt is marked shared but in fact there are no other users
sharing it.  Thus the simplest solution seems to be to just use a
threaded interrupt handler, after converting it to oneshot.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/s5p-sss.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dce1af0ce85c..a668286d62cb 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -805,8 +805,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
 		dev_warn(dev, "feed control interrupt is not available.\n");
 		goto err_irq;
 	}
-	err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
-			       IRQF_SHARED, pdev->name, pdev);
+	err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+					s5p_aes_interrupt, IRQF_ONESHOT,
+					pdev->name, pdev);
 	if (err < 0) {
 		dev_warn(dev, "feed control interrupt is not available.\n");
 		goto err_irq;
-- 
cgit v1.2.3


From 45c2fdde01299b02a6e3225e848598a3c1e55539 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 7 Mar 2017 15:14:46 +0100
Subject: hwrng: omap - write registers after enabling the clock

Commit 383212425c926 ("hwrng: omap - Add device variant for SafeXcel
IP-76 found in Armada 8K") added support for the SafeXcel IP-76 variant
of the IP. This modification included getting a reference and enabling a
clock. Unfortunately, this was done *after* writing to the
RNG_INTMASK_REG register. This generally works fine when the driver is
built-in because the clock might have been left enabled by the
bootloader, but fails short when the driver is built as a module: it
causes a system hang because a register is being accessed while the
clock is not enabled.

This commit fixes that by making the register access *after* enabling
the clock.

This issue was found by the kernelci.org testing effort.

Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 3ad86fdf954e..efa3747c1750 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -397,7 +397,6 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 				irq, err);
 			return err;
 		}
-		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
 
 		priv->clk = of_clk_get(pdev->dev.of_node, 0);
 		if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
@@ -408,6 +407,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 				dev_err(&pdev->dev, "unable to enable the clk, "
 						    "err = %d\n", err);
 		}
+
+		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 761c2510283066324cab7859930777ee34cbca78 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 7 Mar 2017 15:14:47 +0100
Subject: hwrng: omap - use devm_clk_get() instead of of_clk_get()

The omap-rng driver currently uses of_clk_get() to get a reference to
the clock, but never releases that reference. This commit fixes that by
using devm_clk_get() instead.

Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index efa3747c1750..d2866280f130 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -398,7 +398,7 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 			return err;
 		}
 
-		priv->clk = of_clk_get(pdev->dev.of_node, 0);
+		priv->clk = devm_clk_get(&pdev->dev, NULL);
 		if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
 			return -EPROBE_DEFER;
 		if (!IS_ERR(priv->clk)) {
-- 
cgit v1.2.3


From b985735be7afea3a5e0570ce2ea0b662c0e12e19 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 7 Mar 2017 15:14:48 +0100
Subject: hwrng: omap - Do not access INTMASK_REG on EIP76

The INTMASK_REG register does not exist on EIP76. Due to this, the call:

   omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);

ends up, through the reg_map_eip76[] array, in accessing the register at
offset 0, which is the RNG_OUTPUT_0_REG. This by itself doesn't cause
any problem, but clearly doesn't enable the interrupt as it was
expected.

On EIP76, the register that allows to enable the interrupt is
RNG_CONTROL_REG. And just like RNG_INTMASK_REG, it's bit 1 of this
register that allows to enable the shutdown_oflo interrupt.

Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index d2866280f130..b1ad12552b56 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -408,7 +408,18 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 						    "err = %d\n", err);
 		}
 
-		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
+		/*
+		 * On OMAP4, enabling the shutdown_oflo interrupt is
+		 * done in the interrupt mask register. There is no
+		 * such register on EIP76, and it's enabled by the
+		 * same bit in the control register
+		 */
+		if (priv->pdata->regs[RNG_INTMASK_REG])
+			omap_rng_write(priv, RNG_INTMASK_REG,
+				       RNG_SHUTDOWN_OFLO_MASK);
+		else
+			omap_rng_write(priv, RNG_CONTROL_REG,
+				       RNG_SHUTDOWN_OFLO_MASK);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From a04e54f2c35823ca32d56afcd5cea5b783e2f51a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 3 Nov 2016 23:06:53 -0700
Subject: target/pscsi: Fix TYPE_TAPE + TYPE_MEDIMUM_CHANGER export

The following fixes a divide by zero OOPs with TYPE_TAPE
due to pscsi_tape_read_blocksize() failing causing a zero
sd->sector_size being propigated up via dev_attrib.hw_block_size.

It also fixes another long-standing bug where TYPE_TAPE and
TYPE_MEDIMUM_CHANGER where using pscsi_create_type_other(),
which does not call scsi_device_get() to take the device
reference.  Instead, rename pscsi_create_type_rom() to
pscsi_create_type_nondisk() and use it for all cases.

Finally, also drop a dump_stack() in pscsi_get_blocks() for
non TYPE_DISK, which in modern target-core can get invoked
via target_sense_desc_format() during CHECK_CONDITION.

Reported-by: Malcolm Haak <insanemal@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pscsi.c | 47 ++++++++++----------------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index a8f8e53f2f57..44d92f23a3f0 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 
 	buf = kzalloc(12, GFP_KERNEL);
 	if (!buf)
-		return;
+		goto out_free;
 
 	memset(cdb, 0, MAX_COMMAND_SIZE);
 	cdb[0] = MODE_SENSE;
@@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 	 * If MODE_SENSE still returns zero, set the default value to 1024.
 	 */
 	sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+out_free:
 	if (!sdev->sector_size)
 		sdev->sector_size = 1024;
-out_free:
+
 	kfree(buf);
 }
 
@@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
 				sd->lun, sd->queue_depth);
 	}
 
-	dev->dev_attrib.hw_block_size = sd->sector_size;
+	dev->dev_attrib.hw_block_size =
+		min_not_zero((int)sd->sector_size, 512);
 	dev->dev_attrib.hw_max_sectors =
-		min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q));
+		min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q));
 	dev->dev_attrib.hw_queue_depth = sd->queue_depth;
 
 	/*
@@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
 	/*
 	 * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE.
 	 */
-	if (sd->type == TYPE_TAPE)
+	if (sd->type == TYPE_TAPE) {
 		pscsi_tape_read_blocksize(dev, sd);
+		dev->dev_attrib.hw_block_size = sd->sector_size;
+	}
 	return 0;
 }
 
@@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 /*
  * Called with struct Scsi_Host->host_lock called.
  */
-static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
+static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd)
 	__releases(sh->host_lock)
 {
 	struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
@@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
 	return 0;
 }
 
-/*
- * Called with struct Scsi_Host->host_lock called.
- */
-static int pscsi_create_type_other(struct se_device *dev,
-		struct scsi_device *sd)
-	__releases(sh->host_lock)
-{
-	struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
-	struct Scsi_Host *sh = sd->host;
-	int ret;
-
-	spin_unlock_irq(sh->host_lock);
-	ret = pscsi_add_device_to_list(dev, sd);
-	if (ret)
-		return ret;
-
-	pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n",
-		phv->phv_host_id, scsi_device_type(sd->type), sh->host_no,
-		sd->channel, sd->id, sd->lun);
-	return 0;
-}
-
 static int pscsi_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
@@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev)
 		case TYPE_DISK:
 			ret = pscsi_create_type_disk(dev, sd);
 			break;
-		case TYPE_ROM:
-			ret = pscsi_create_type_rom(dev, sd);
-			break;
 		default:
-			ret = pscsi_create_type_other(dev, sd);
+			ret = pscsi_create_type_nondisk(dev, sd);
 			break;
 		}
 
@@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev)
 		else if (pdv->pdv_lld_host)
 			scsi_host_put(pdv->pdv_lld_host);
 
-		if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
-			scsi_device_put(sd);
+		scsi_device_put(sd);
 
 		pdv->pdv_sd = NULL;
 	}
@@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
 	if (pdv->pdv_bd && pdv->pdv_bd->bd_part)
 		return pdv->pdv_bd->bd_part->nr_sects;
 
-	dump_stack();
 	return 0;
 }
 
-- 
cgit v1.2.3


From 13603685c1f12c67a7a2427f00b63f39a2b6f7c9 Mon Sep 17 00:00:00 2001
From: Max Lohrmann <post@wickenrode.com>
Date: Tue, 7 Mar 2017 22:09:56 -0800
Subject: target: Fix VERIFY_16 handling in sbc_parse_cdb

As reported by Max, the Windows 2008 R2 chkdsk utility expects
VERIFY_16 to be supported, and does not handle the returned
CHECK_CONDITION properly, resulting in an infinite loop.

The kernel will log huge amounts of this error:

kernel: TARGET_CORE[iSCSI]: Unsupported SCSI Opcode 0x8f, sending
CHECK_CONDITION.

Signed-off-by: Max Lohrmann <post@wickenrode.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 68d8aef7ab78..c194063f169b 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 			return ret;
 		break;
 	case VERIFY:
+	case VERIFY_16:
 		size = 0;
-		sectors = transport_get_sectors_10(cdb);
-		cmd->t_task_lba = transport_lba_32(cdb);
+		if (cdb[0] == VERIFY) {
+			sectors = transport_get_sectors_10(cdb);
+			cmd->t_task_lba = transport_lba_32(cdb);
+		} else {
+			sectors = transport_get_sectors_16(cdb);
+			cmd->t_task_lba = transport_lba_64(cdb);
+		}
 		cmd->execute_cmd = sbc_emulate_noop;
 		goto check_lba;
 	case REZERO_UNIT:
-- 
cgit v1.2.3


From 69eb1596b4df8ca834ba6f9a3df40f78943f6dca Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Feb 2017 15:32:23 +0100
Subject: staging: octeon: remove unused variable

A cleanup patch left one local variable without a reference:

drivers/staging/octeon/ethernet-rx.c:339:28: warning: unused variable 'priv' [-Wunused-variable]

This removes the declaration too.

Fixes: 66812da3a689 ("staging: octeon: Use net_device_stats from struct net_device")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/octeon/ethernet-rx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 7f8cf875157c..65a285631994 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -336,7 +336,6 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int budget)
 		if (likely((port < TOTAL_NUMBER_OF_PORTS) &&
 			   cvm_oct_device[port])) {
 			struct net_device *dev = cvm_oct_device[port];
-			struct octeon_ethernet *priv = netdev_priv(dev);
 
 			/*
 			 * Only accept packets for devices that are
-- 
cgit v1.2.3


From fc69910f329d61821897871e0e957eda39beb3d8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 8 Mar 2017 08:29:31 +0100
Subject: MIPS: Add missing include files

After the split of linux/sched.h, several platforms in arch/mips stopped building.

Add the respective additional #include statements to fix the problem I first
tried adding these into asm/processor.h, but ran into circular header
dependencies with that which I could not figure out.

The commit I listed as causing the problem is the branch merge, as there is
likely a combination of multiple patches in that branch.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Cc: ralf@linux-mips.org
Fixes: 1827adb11ad2 ("Merge branch 'WIP.sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
Link: http://lkml.kernel.org/r/20170308072931.3836696-1-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/cavium-octeon/cpu.c                  | 2 ++
 arch/mips/cavium-octeon/crypto/octeon-crypto.c | 1 +
 arch/mips/cavium-octeon/smp.c                  | 1 +
 arch/mips/include/asm/fpu.h                    | 1 +
 arch/mips/kernel/smp-bmips.c                   | 1 +
 arch/mips/kernel/smp-mt.c                      | 1 +
 arch/mips/loongson64/loongson-3/cop2-ex.c      | 1 +
 arch/mips/netlogic/common/smp.c                | 1 +
 arch/mips/netlogic/xlp/cop2-ex.c               | 3 +++
 arch/mips/sgi-ip22/ip28-berr.c                 | 1 +
 arch/mips/sgi-ip27/ip27-berr.c                 | 2 ++
 arch/mips/sgi-ip27/ip27-smp.c                  | 3 +++
 arch/mips/sgi-ip32/ip32-berr.c                 | 1 +
 arch/mips/sgi-ip32/ip32-reset.c                | 1 +
 14 files changed, 20 insertions(+)

diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c
index a5b427909b5c..036d56cc4591 100644
--- a/arch/mips/cavium-octeon/cpu.c
+++ b/arch/mips/cavium-octeon/cpu.c
@@ -10,7 +10,9 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/prefetch.h>
+#include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cop2.h>
 #include <asm/current.h>
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 4d22365844af..cfb4a146cf17 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -9,6 +9,7 @@
 #include <asm/cop2.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
+#include <linux/sched/task_stack.h>
 
 #include "octeon-crypto.h"
 
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4b94b7fbafa3..3de786545ded 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -12,6 +12,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/export.h>
 
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 321752bcbab6..f94455f964ec 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/ptrace.h>
 #include <linux/thread_info.h>
 #include <linux/bitops.h>
 
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 3daa2cae50b0..1b070a76fcdd 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index e077ea3e11fb..e398cbc3d776 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip/mips-gic.h>
 #include <linux/compiler.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 
 #include <linux/atomic.h>
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c
index ea13764d0a03..621d6af5f6eb 100644
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson64/loongson-3/cop2-ex.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
+#include <linux/ptrace.h>
 
 #include <asm/fpu.h>
 #include <asm/cop2.h>
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index 10d86d54880a..bddf1ef553a4 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -35,6 +35,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
 
diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c
index 52bc5de42005..21e439b3db70 100644
--- a/arch/mips/netlogic/xlp/cop2-ex.c
+++ b/arch/mips/netlogic/xlp/cop2-ex.c
@@ -9,11 +9,14 @@
  * Copyright (C) 2009 Wind River Systems,
  *   written by Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/prefetch.h>
+#include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cop2.h>
 #include <asm/current.h>
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 1f2a5bc4779e..75460e1e106b 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index d12879eb2b1f..83efe03d5c60 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -12,7 +12,9 @@
 #include <linux/signal.h>	/* for SIGBUS */
 #include <linux/sched.h>	/* schow_regs(), force_sig() */
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 
+#include <asm/ptrace.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/sn0/hub.h>
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index f5ed45e8f442..4cd47d23d81a 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -8,10 +8,13 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/topology.h>
 #include <linux/nodemask.h>
+
 #include <asm/page.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/gda.h>
 #include <asm/sn/intr.h>
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index 57d8c7486fe6..c1f12a9cf305 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 8bd415c8729f..b3b442def423 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/notifier.h>
 #include <linux/delay.h>
 #include <linux/rtc/ds1685.h>
-- 
cgit v1.2.3


From 12aff99723901bcc0e2a6a34343a4f62c371fdd9 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 7 Feb 2017 17:14:14 -0200
Subject: ARM: dts: imx6sx-udoo-neo: Fix reboot hang

After issuing a 'reboot' command the imx6sx-udoo-neo board does not
reboot as expected and it just hangs instead.

In mainline kernel only LDO enabled mode is supported. Do not provide
arm-supply/soc-supply nodes in the device tree, so that the board operates
in LDO enabled mode and can then successfully reboot via watchdog.

Fixes: 76e691fc7653b85d39 ("ARM: dts: imx6sx: Add UDOO Neo support")
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6sx-udoo-neo.dtsi | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
index 49f466fe0b1d..dcfc97591433 100644
--- a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
+++ b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
@@ -121,11 +121,6 @@
 	};
 };
 
-&cpu0 {
-	arm-supply = <&sw1a_reg>;
-	soc-supply = <&sw1c_reg>;
-};
-
 &fec1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet1>;
-- 
cgit v1.2.3


From 35b2719e72d375f3e32c819858165668d948d5f2 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Wed, 8 Mar 2017 13:56:37 +0200
Subject: usb: dwc3: gadget: make to increment req->remaining in all cases

Sometimes, we might get a completion for a TRB which is left with HWO
bit. Even in these cases, we should increment req->remaining to
properly report total transferred size. I noticed this while debuggin
a separate problem seen with MSC tests from USBCV. Sometimes we would
erroneously report a completion for a 512-byte transfer when, in
reality, we transferred 0 bytes.

Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3db5eeae2bfb..0d75158e43fe 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2189,12 +2189,12 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
 		return 1;
 	}
 
-	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-		return 1;
-
 	count = trb->size & DWC3_TRB_SIZE_MASK;
 	req->remaining += count;
 
+	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
+		return 1;
+
 	if (dep->direction) {
 		if (count) {
 			trb_status = DWC3_TRB_SIZE_TRBSTS(trb->size);
-- 
cgit v1.2.3


From 0b1d250afb8eb9d65afb568bac9b9f9253a82b49 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:37 +0100
Subject: USB: serial: io_ti: fix NULL-deref in interrupt callback

Fix a NULL-pointer dereference in the interrupt callback should a
malicious device send data containing a bad port number by adding the
missing sanity check.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/io_ti.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index ceaeebaa6f90..4561dd4cde8b 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1674,6 +1674,12 @@ static void edge_interrupt_callback(struct urb *urb)
 	function    = TIUMP_GET_FUNC_FROM_CODE(data[0]);
 	dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__,
 		port_number, function, data[1]);
+
+	if (port_number >= edge_serial->serial->num_ports) {
+		dev_err(dev, "bad port number %d\n", port_number);
+		goto exit;
+	}
+
 	port = edge_serial->serial->port[port_number];
 	edge_port = usb_get_serial_port_data(port);
 	if (!edge_port) {
-- 
cgit v1.2.3


From 30572418b445d85fcfe6c8fe84c947d2606767d8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:38 +0100
Subject: USB: serial: omninet: fix reference leaks at open

This driver needlessly took another reference to the tty on open, a
reference which was then never released on close. This lead to not just
a leak of the tty, but also a driver reference leak that prevented the
driver from being unloaded after a port had once been opened.

Fixes: 4a90f09b20f4 ("tty: usb-serial krefs")
Cc: stable <stable@vger.kernel.org>	# 2.6.28
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/omninet.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index a180b17d2432..76564b3bebb9 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -142,12 +142,6 @@ static int omninet_port_remove(struct usb_serial_port *port)
 
 static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
-	struct usb_serial	*serial = port->serial;
-	struct usb_serial_port	*wport;
-
-	wport = serial->port[1];
-	tty_port_tty_set(&wport->port, tty);
-
 	return usb_serial_generic_open(tty, port);
 }
 
-- 
cgit v1.2.3


From 367ec1706745912702c187722065285cd4d2aee7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:39 +0100
Subject: USB: serial: omninet: drop open callback

Remove the now redundant open callback and let core call the generic
handler for us instead.

Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/omninet.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 76564b3bebb9..dd706953b466 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -31,7 +31,6 @@
 #define BT_IGNITIONPRO_ID	0x2000
 
 /* function prototypes */
-static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void omninet_process_read_urb(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -60,7 +59,6 @@ static struct usb_serial_driver zyxel_omninet_device = {
 	.attach =		omninet_attach,
 	.port_probe =		omninet_port_probe,
 	.port_remove =		omninet_port_remove,
-	.open =			omninet_open,
 	.write =		omninet_write,
 	.write_room =		omninet_write_room,
 	.write_bulk_callback =	omninet_write_bulk_callback,
@@ -140,11 +138,6 @@ static int omninet_port_remove(struct usb_serial_port *port)
 	return 0;
 }
 
-static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
-{
-	return usb_serial_generic_open(tty, port);
-}
-
 #define OMNINET_HEADERLEN	4
 #define OMNINET_BULKOUTSIZE	64
 #define OMNINET_PAYLOADSIZE	(OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN)
-- 
cgit v1.2.3


From 654b404f2a222f918af9b0cd18ad469d0c941a8e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:40 +0100
Subject: USB: serial: io_ti: fix information leak in completion handler

Add missing sanity check to the bulk-in completion handler to avoid an
integer underflow that can be triggered by a malicious device.

This avoids leaking 128 kB of memory content from after the URB transfer
buffer to user space.

Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>	# 2.6.30
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/io_ti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 4561dd4cde8b..a76b95d32157 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1761,7 +1761,7 @@ static void edge_bulk_in_callback(struct urb *urb)
 
 	port_number = edge_port->port->port_number;
 
-	if (edge_port->lsr_event) {
+	if (urb->actual_length > 0 && edge_port->lsr_event) {
 		edge_port->lsr_event = 0;
 		dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n",
 			__func__, port_number, edge_port->lsr_mask, *data);
-- 
cgit v1.2.3


From 8c76d7cd520ebffc1ea9ea0850d87a224a50c7f2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:41 +0100
Subject: USB: serial: safe_serial: fix information leak in completion handler

Add missing sanity check to the bulk-in completion handler to avoid an
integer underflow that could be triggered by a malicious device.

This avoids leaking up to 56 bytes from after the URB transfer buffer to
user space.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/safe_serial.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index 93c6c9b08daa..8a069aa154ed 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -200,6 +200,11 @@ static void safe_process_read_urb(struct urb *urb)
 	if (!safe)
 		goto out;
 
+	if (length < 2) {
+		dev_err(&port->dev, "malformed packet\n");
+		return;
+	}
+
 	fcs = fcs_compute10(data, length, CRC10_INITFCS);
 	if (fcs) {
 		dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs);
-- 
cgit v1.2.3


From 99c014a87979c9244806685f3c3fc7767f79f645 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Mar 2017 10:16:17 -0500
Subject: ktest: Fix while loop in wait_for_input

The run_command function was changed to use the wait_for_input function to
allow having a timeout if the command to run takes too much time. There was
a bug in the wait_for_input where it could end up going into an infinite
loop. There's two issues here. One is that the return value of the sysread
wasn't used for the write (to write a proper size), and that it should
continue processing the passed in file descriptor too even if there was
input. There was no check for error, if for some reason STDIN returned an
error, the function would go into an infinite loop and never exit.

Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fixes: 6e98d1b4415f ("ktest: Add timeout to ssh command")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 29470b554711..0c006a2f165d 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1904,11 +1904,12 @@ sub wait_for_input
 
 	# copy data from stdin to the console
 	if (vec($rout, fileno(\*STDIN), 1) == 1) {
-	    sysread(\*STDIN, $buf, 1000);
-	    syswrite($fp, $buf, 1000);
-	    next;
+	    $nr = sysread(\*STDIN, $buf, 1000);
+	    syswrite($fp, $buf, $nr) if ($nr > 0);
 	}
 
+	next if (vec($rout, fileno($fp), 1) != 1);
+
 	$line = "";
 
 	# try to read one char at a time
-- 
cgit v1.2.3


From f7c6401ff84ab8ffffc281a29aa0a787f7eb346e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Mar 2017 10:36:59 -0500
Subject: ktest: Make sure wait_for_input does honor the timeout

The function wait_for_input takes in a timeout, and even has a default
timeout. But if for some reason the STDIN descriptor keeps sending in data,
the function will never time out. The timout is to wait for the data from
the passed in file descriptor, not for STDIN. Adding a test in the case
where there's no data from the passed in file descriptor that checks to see
if the timeout passed, will ensure that it will timeout properly even if
there's input in STDIN.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 0c006a2f165d..49f7c8b9d9c4 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1880,6 +1880,7 @@ sub get_grub_index {
 sub wait_for_input
 {
     my ($fp, $time) = @_;
+    my $start_time;
     my $rin;
     my $rout;
     my $nr;
@@ -1895,12 +1896,12 @@ sub wait_for_input
     vec($rin, fileno($fp), 1) = 1;
     vec($rin, fileno(\*STDIN), 1) = 1;
 
+    $start_time = time;
+
     while (1) {
 	$nr = select($rout=$rin, undef, undef, $time);
 
-	if ($nr <= 0) {
-	    return undef;
-	}
+	last if ($nr <= 0);
 
 	# copy data from stdin to the console
 	if (vec($rout, fileno(\*STDIN), 1) == 1) {
@@ -1908,7 +1909,11 @@ sub wait_for_input
 	    syswrite($fp, $buf, $nr) if ($nr > 0);
 	}
 
-	next if (vec($rout, fileno($fp), 1) != 1);
+	# The timeout is based on time waiting for the fp data
+	if (vec($rout, fileno($fp), 1) != 1) {
+	    last if (defined($time) && (time - $start_time > $time));
+	    next;
+	}
 
 	$line = "";
 
@@ -1918,12 +1923,11 @@ sub wait_for_input
 	    last if ($ch eq "\n");
 	}
 
-	if (!length($line)) {
-	    return undef;
-	}
+	last if (!length($line));
 
 	return $line;
     }
+    return undef;
 }
 
 sub reboot_to {
-- 
cgit v1.2.3


From 2501c1bb054290679baad0ff7f4f07c714251f4c Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Mon, 6 Mar 2017 15:20:51 -0500
Subject: i2c: riic: fix restart condition

While modifying the driver to use the STOP interrupt, the completion of the
intermediate transfers need to wake the driver back up in order to initiate
the next transfer (restart condition). Otherwise you get never ending
interrupts and only the first transfer sent.

Fixes: 71ccea095ea1 ("i2c: riic: correctly finish transfers")
Reported-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Tested-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-riic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c
index 8f11d347b3ec..c811af4c8d81 100644
--- a/drivers/i2c/busses/i2c-riic.c
+++ b/drivers/i2c/busses/i2c-riic.c
@@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data)
 	}
 
 	if (riic->is_last || riic->err) {
-		riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
+		riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER);
 		writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
+	} else {
+		/* Transfer is complete, but do not send STOP */
+		riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER);
+		complete(&riic->msg_done);
 	}
 
 	return IRQ_HANDLED;
-- 
cgit v1.2.3


From 737f98cfe7de8df7433a4d846850aa8efa44bd48 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:13:59 +0800
Subject: blk-mq: initialize mq kobjects in blk_mq_init_allocated_queue()

Both q->mq_kobj and sw queues' kobjects should have been initialized
once, instead of doing that each add_disk context.

Also this patch removes clearing of ctx in blk_mq_init_cpu_queues()
because percpu allocator fills zero to allocated variable.

This patch fixes one issue[1] reported from Omar.

[1] kernel wearning when doing unbind/bind on one scsi-mq device

[   19.347924] kobject (ffff8800791ea0b8): tried to init an initialized object, something is seriously wrong.
[   19.349781] CPU: 1 PID: 84 Comm: kworker/u8:1 Not tainted 4.10.0-rc7-00210-g53f39eeaa263 #34
[   19.350686] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-20161122_114906-anatol 04/01/2014
[   19.350920] Workqueue: events_unbound async_run_entry_fn
[   19.350920] Call Trace:
[   19.350920]  dump_stack+0x63/0x83
[   19.350920]  kobject_init+0x77/0x90
[   19.350920]  blk_mq_register_dev+0x40/0x130
[   19.350920]  blk_register_queue+0xb6/0x190
[   19.350920]  device_add_disk+0x1ec/0x4b0
[   19.350920]  sd_probe_async+0x10d/0x1c0 [sd_mod]
[   19.350920]  async_run_entry_fn+0x48/0x150
[   19.350920]  process_one_work+0x1d0/0x480
[   19.350920]  worker_thread+0x48/0x4e0
[   19.350920]  kthread+0x101/0x140
[   19.350920]  ? process_one_work+0x480/0x480
[   19.350920]  ? kthread_create_on_node+0x60/0x60
[   19.350920]  ret_from_fork+0x2c/0x40

Cc: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c | 4 +---
 block/blk-mq.c       | 4 +++-
 block/blk-mq.h       | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 295e69670c39..124305407c80 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -277,7 +277,7 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 	kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
 }
 
-static void blk_mq_sysfs_init(struct request_queue *q)
+void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
 	int cpu;
@@ -297,8 +297,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
 
 	blk_mq_disable_hotplug();
 
-	blk_mq_sysfs_init(q);
-
 	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
 		goto out;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b2fd175e84d7..ed4b55176cdd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2045,7 +2045,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
 		struct blk_mq_hw_ctx *hctx;
 
-		memset(__ctx, 0, sizeof(*__ctx));
 		__ctx->cpu = i;
 		spin_lock_init(&__ctx->lock);
 		INIT_LIST_HEAD(&__ctx->rq_list);
@@ -2352,6 +2351,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	if (!q->queue_ctx)
 		goto err_exit;
 
+	/* init q->mq_kobj and sw queues' kobjects */
+	blk_mq_sysfs_init(q);
+
 	q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
 						GFP_KERNEL, set->numa_node);
 	if (!q->queue_hw_ctx)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 088ced003c13..ad8bfd7473ef 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -77,6 +77,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
 /*
  * sysfs helpers
  */
+extern void blk_mq_sysfs_init(struct request_queue *q);
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
-- 
cgit v1.2.3


From 7ea5fe31c12dd8bcf4a9c5a4a7e8e23826a9a3b8 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:14:00 +0800
Subject: blk-mq: make lifetime consitent between q/ctx and its kobject

Currently from kobject view, both q->mq_kobj and ctx->kobj can
be released during one cycle of blk_mq_register_dev() and
blk_mq_unregister_dev(). Actually, sw queue's lifetime is
same with its request queue's, which is covered by request_queue->kobj.

So we don't need to call kobject_put() for the two kinds of
kobject in __blk_mq_unregister_dev(), instead we do that
in release handler of request queue.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c | 20 +++++++++++++-------
 block/blk-mq.c       |  7 ++++++-
 block/blk-mq.h       |  1 +
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 124305407c80..77fb238af2be 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -242,15 +242,11 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
-	int i, j;
+	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		blk_mq_unregister_hctx(hctx);
 
-		hctx_for_each_ctx(hctx, ctx, j)
-			kobject_put(&ctx->kobj);
-
 		kobject_put(&hctx->kobj);
 	}
 
@@ -258,8 +254,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(&q->mq_kobj);
-	kobject_put(&q->mq_kobj);
-
 	kobject_put(&dev->kobj);
 
 	q->mq_sysfs_init_done = false;
@@ -277,6 +271,18 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 	kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
 }
 
+void blk_mq_sysfs_deinit(struct request_queue *q)
+{
+	struct blk_mq_ctx *ctx;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ctx = per_cpu_ptr(q->queue_ctx, cpu);
+		kobject_put(&ctx->kobj);
+	}
+	kobject_put(&q->mq_kobj);
+}
+
 void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ed4b55176cdd..b985c236f50f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2264,7 +2264,12 @@ void blk_mq_release(struct request_queue *q)
 
 	kfree(q->queue_hw_ctx);
 
-	/* ctx kobj stays in queue_ctx */
+	/*
+	 * release .mq_kobj and sw queue's kobject now because
+	 * both share lifetime with request queue.
+	 */
+	blk_mq_sysfs_deinit(q);
+
 	free_percpu(q->queue_ctx);
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ad8bfd7473ef..b79f9a7d8cf6 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -78,6 +78,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
  * sysfs helpers
  */
 extern void blk_mq_sysfs_init(struct request_queue *q);
+extern void blk_mq_sysfs_deinit(struct request_queue *q);
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
-- 
cgit v1.2.3


From 6c8b232efea1ad3d263ff8b9c16b7e8767a77488 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:14:01 +0800
Subject: blk-mq: make lifetime consistent between hctx and its kobject

This patch removes kobject_put() over hctx in __blk_mq_unregister_dev(),
and trys to keep lifetime consistent between hctx and hctx's kobject.

Now blk_mq_sysfs_register() and blk_mq_sysfs_unregister() become
totally symmetrical, and kobject's refcounter drops to zero just
when the hctx is freed.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c | 15 ++++++++++-----
 block/blk-mq.c       |  5 +----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 77fb238af2be..cb19ec16a7fc 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -17,6 +17,14 @@ static void blk_mq_sysfs_release(struct kobject *kobj)
 {
 }
 
+static void blk_mq_hw_sysfs_release(struct kobject *kobj)
+{
+	struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
+						  kobj);
+	kfree(hctx->ctxs);
+	kfree(hctx);
+}
+
 struct blk_mq_ctx_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct blk_mq_ctx *, char *);
@@ -200,7 +208,7 @@ static struct kobj_type blk_mq_ctx_ktype = {
 static struct kobj_type blk_mq_hw_ktype = {
 	.sysfs_ops	= &blk_mq_hw_sysfs_ops,
 	.default_attrs	= default_hw_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_hw_sysfs_release,
 };
 
 static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
@@ -244,12 +252,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
+	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 
-		kobject_put(&hctx->kobj);
-	}
-
 	blk_mq_debugfs_unregister_hctxs(q);
 
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b985c236f50f..f70595e5fb86 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2256,8 +2256,7 @@ void blk_mq_release(struct request_queue *q)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (!hctx)
 			continue;
-		kfree(hctx->ctxs);
-		kfree(hctx);
+		kobject_put(&hctx->kobj);
 	}
 
 	q->mq_map = NULL;
@@ -2336,8 +2335,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			blk_mq_exit_hctx(q, set, hctx, j);
 			free_cpumask_var(hctx->cpumask);
 			kobject_put(&hctx->kobj);
-			kfree(hctx->ctxs);
-			kfree(hctx);
 			hctxs[j] = NULL;
 
 		}
-- 
cgit v1.2.3


From 01388df37627d2e89f0b835377c0eb39d81f671c Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:14:02 +0800
Subject: blk-mq: free hctx->cpumask in release handler of hctx's kobject

It is obviously that hctx->cpumask is per hctx, and both
share same lifetime, so this patch moves freeing of hctx->cpumask
into release handler of hctx's kobject.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c |  1 +
 block/blk-mq.c       | 12 ------------
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index cb19ec16a7fc..d745ab81033a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -21,6 +21,7 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
 {
 	struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
 						  kobj);
+	free_cpumask_var(hctx->cpumask);
 	kfree(hctx->ctxs);
 	kfree(hctx);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f70595e5fb86..159187a28d66 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1955,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
 	}
 }
 
-static void blk_mq_free_hw_queues(struct request_queue *q,
-		struct blk_mq_tag_set *set)
-{
-	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
-
-	queue_for_each_hw_ctx(q, hctx, i)
-		free_cpumask_var(hctx->cpumask);
-}
-
 static int blk_mq_init_hctx(struct request_queue *q,
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
@@ -2333,7 +2323,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			if (hctx->tags)
 				blk_mq_free_map_and_requests(set, j);
 			blk_mq_exit_hctx(q, set, hctx, j);
-			free_cpumask_var(hctx->cpumask);
 			kobject_put(&hctx->kobj);
 			hctxs[j] = NULL;
 
@@ -2446,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q)
 	blk_mq_del_queue_tag_set(q);
 
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
-	blk_mq_free_hw_queues(q, set);
 }
 
 /* Basically redo blk_mq_init_queue with queue frozen */
-- 
cgit v1.2.3


From 0bc315381fe9ed9fb91db8b0e82171b645ac008f Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Mon, 6 Mar 2017 11:23:35 +0100
Subject: zram: set physical queue limits to avoid array out of bounds accesses

zram can handle at most SECTORS_PER_PAGE sectors in a bio's bvec. When using
the NVMe over Fabrics loopback target which potentially sends a huge bulk of
pages attached to the bio's bvec this results in a kernel panic because of
array out of bounds accesses in zram_decompress_page().

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/zram/zram_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e27d89a36c34..dceb5edd1e54 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1189,6 +1189,8 @@ static int zram_add(void)
 	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+	zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
+	zram->disk->queue->limits.chunk_sectors = 0;
 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
 	/*
 	 * zram_bio_discard() will clear all logical blocks if logical block
-- 
cgit v1.2.3


From b0bfdfc2bf7fa85317824c6a389fc373dfcef5bc Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Mon, 6 Mar 2017 08:41:04 -0700
Subject: block/sed: Fix opal user range check and unused variables

Fixes check that the opal user is within the range, and cleans up unused
method variables.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/sed-opal.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 1e18dca360fc..14035f826b5e 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
 
 static int gen_key(struct opal_dev *dev, void *data)
 {
-	const u8 *method;
 	u8 uid[OPAL_UID_LENGTH];
 	int err = 0;
 
@@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data)
 	set_comid(dev, dev->comid);
 
 	memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len));
-	method = opalmethod[OPAL_GENKEY];
 	kfree(dev->prev_data);
 	dev->prev_data = NULL;
 
@@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
 static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
 {
 	u8 lr_buffer[OPAL_UID_LENGTH];
-	const u8 *method;
 	struct opal_lock_unlock *lkul = data;
 	u8 read_locked = 1, write_locked = 1;
 	int err = 0;
@@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
 	clear_opal_cmd(dev);
 	set_comid(dev, dev->comid);
 
-	method = opalmethod[OPAL_SET];
 	if (build_locking_range(lr_buffer, sizeof(lr_buffer),
 				lkul->session.opal_key.lr) < 0)
 		return -ERANGE;
@@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
 {
 	u8 lr_buffer[OPAL_UID_LENGTH];
 	u8 read_locked = 1, write_locked = 1;
-	const u8 *method;
 	struct opal_lock_unlock *lkul = data;
 	int ret;
 
 	clear_opal_cmd(dev);
 	set_comid(dev, dev->comid);
 
-	method = opalmethod[OPAL_SET];
 	if (build_locking_range(lr_buffer, sizeof(lr_buffer),
 				lkul->session.opal_key.lr) < 0)
 		return -ERANGE;
@@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
 		pr_err("Locking state was not RO or RW\n");
 		return -EINVAL;
 	}
-	if (lk_unlk->session.who < OPAL_USER1 &&
+	if (lk_unlk->session.who < OPAL_USER1 ||
 	    lk_unlk->session.who > OPAL_USER9) {
 		pr_err("Authority was not within the range of users: %d\n",
 		       lk_unlk->session.who);
@@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev,
 	int ret;
 
 	/* We can't activate Admin1 it's active as manufactured */
-	if (opal_session->who < OPAL_USER1 &&
+	if (opal_session->who < OPAL_USER1 ||
 	    opal_session->who > OPAL_USER9) {
 		pr_err("Who was not a valid user: %d\n", opal_session->who);
 		return -EINVAL;
-- 
cgit v1.2.3


From 02dbfa5e5583523035f05636c614a0eca77f1aab Mon Sep 17 00:00:00 2001
From: Qi Hou <qi.hou@windriver.com>
Date: Fri, 3 Mar 2017 15:57:11 +0800
Subject: i2c: add missing of_node_put in i2c_mux_del_adapters

Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter().
It must be decreased with of_node_put() in i2c_mux_del_adapters().

Signe-off-by: Qi Hou <qi.hou@windriver.com>
Reviewed-by: Zhang Xiao <xiao.zhang@windriver.com>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-mux.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 83768e85a919..2178266bca79 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 	while (muxc->num_adapters) {
 		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
 		struct i2c_mux_priv *priv = adap->algo_data;
+		struct device_node *np = adap->dev.of_node;
 
 		muxc->adapter[muxc->num_adapters] = NULL;
 
@@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 
 		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
 		i2c_del_adapter(adap);
+		of_node_put(np);
 		kfree(priv);
 	}
 }
-- 
cgit v1.2.3


From 2de3ec4f1d4ba6ee380478055104eb918bd50cce Mon Sep 17 00:00:00 2001
From: Jaedon Shin <jaedon.shin@gmail.com>
Date: Fri, 3 Mar 2017 10:55:03 +0900
Subject: i2c: brcmstb: Fix START and STOP conditions

The BSC data buffers to send and receive data are each of size 32 bytes
or 8 bytes 'xfersz' depending on SoC. The problem observed for all the
combined message transfer was if length of data transfer was a multiple
of 'xfersz' a repeated START was being transmitted by BSC driver. Fixed
this by appropriately setting START/STOP conditions for such transfers.

Fixes: dd1aa2524bc5 ("i2c: brcmstb: Add Broadcom settop SoC i2c controller driver")
Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>
Acked-by: Kamal Dasu <kdasu.kdev@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-brcmstb.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 0652281662a8..78792b4d6437 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -465,6 +465,7 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 	u8 *tmp_buf;
 	int len = 0;
 	int xfersz = brcmstb_i2c_get_xfersz(dev);
+	u32 cond, cond_per_msg;
 
 	if (dev->is_suspended)
 		return -EBUSY;
@@ -481,10 +482,11 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 			pmsg->buf ? pmsg->buf[0] : '0', pmsg->len);
 
 		if (i < (num - 1) && (msgs[i + 1].flags & I2C_M_NOSTART))
-			brcmstb_set_i2c_start_stop(dev, ~(COND_START_STOP));
+			cond = ~COND_START_STOP;
 		else
-			brcmstb_set_i2c_start_stop(dev,
-						   COND_RESTART | COND_NOSTOP);
+			cond = COND_RESTART | COND_NOSTOP;
+
+		brcmstb_set_i2c_start_stop(dev, cond);
 
 		/* Send slave address */
 		if (!(pmsg->flags & I2C_M_NOSTART)) {
@@ -497,13 +499,24 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 			}
 		}
 
+		cond_per_msg = cond;
+
 		/* Perform data transfer */
 		while (len) {
 			bytes_to_xfer = min(len, xfersz);
 
-			if (len <= xfersz && i == (num - 1))
-				brcmstb_set_i2c_start_stop(dev,
-							   ~(COND_START_STOP));
+			if (len <= xfersz) {
+				if (i == (num - 1))
+					cond_per_msg = cond_per_msg &
+						~(COND_RESTART | COND_NOSTOP);
+				else
+					cond_per_msg = cond;
+			} else {
+				cond_per_msg = (cond_per_msg & ~COND_RESTART) |
+					COND_NOSTOP;
+			}
+
+			brcmstb_set_i2c_start_stop(dev, cond_per_msg);
 
 			rc = brcmstb_i2c_xfer_bsc_data(dev, tmp_buf,
 						       bytes_to_xfer, pmsg);
@@ -512,6 +525,8 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 
 			len -=  bytes_to_xfer;
 			tmp_buf += bytes_to_xfer;
+
+			cond_per_msg = COND_NOSTART | COND_NOSTOP;
 		}
 	}
 
-- 
cgit v1.2.3


From 7b4fdf77a450ec0fdcb2f677b080ddbf2c186544 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 3 Mar 2017 21:44:00 +0100
Subject: netfilter: don't track fragmented packets

Andrey reports syzkaller splat caused by

NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));

in ipv4 nat.  But this assertion (and the comment) are wrong, this function
does see fragments when IP_NODEFRAG setsockopt is used.

As conntrack doesn't track packets without complete l4 header, only the
first fragment is tracked.

Because applying nat to first packet but not the rest makes no sense this
also turns off tracking of all fragments.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++++
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index bc1486f2c064..2e14ed11a35c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv,
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
+
+	if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+		return NF_ACCEPT;
+
 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f8aad03d674b..6f5e8d01b876 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 	/* maniptype == SRC for postrouting. */
 	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
 
-	/* We never see fragments: conntrack defrags on pre-routing
-	 * and local-out, and nf_nat_out protects post-routing.
-	 */
-	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
 	ct = nf_ct_get(skb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	 * have dropped it.  Hence it's the user's responsibilty to
-- 
cgit v1.2.3


From 1945250d979203ed326b5a44fd705b6b2c46eec5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 27 Feb 2017 20:25:05 +0100
Subject: i2c: m65xx: drop superfluous quirk structure

All length fields in Linux I2C are u16, so a HW length limitation of 16
bit lengths is not a limitation. Remove the quirk structure.

Tested-by: Jun Gao <jun.gao@mediatek.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-mt65xx.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 4a7d9bc2142b..45d61714c81b 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -172,14 +172,6 @@ static const struct i2c_adapter_quirks mt6577_i2c_quirks = {
 	.max_comb_2nd_msg_len = 31,
 };
 
-static const struct i2c_adapter_quirks mt8173_i2c_quirks = {
-	.max_num_msgs = 65535,
-	.max_write_len = 65535,
-	.max_read_len = 65535,
-	.max_comb_1st_msg_len = 65535,
-	.max_comb_2nd_msg_len = 65535,
-};
-
 static const struct mtk_i2c_compatible mt6577_compat = {
 	.quirks = &mt6577_i2c_quirks,
 	.pmic_i2c = 0,
@@ -199,7 +191,6 @@ static const struct mtk_i2c_compatible mt6589_compat = {
 };
 
 static const struct mtk_i2c_compatible mt8173_compat = {
-	.quirks = &mt8173_i2c_quirks,
 	.pmic_i2c = 0,
 	.dcm = 1,
 	.auto_restart = 1,
-- 
cgit v1.2.3


From 8e05ba7f848475bdc3aa546cf88418f7e51a6671 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Sat, 4 Mar 2017 18:00:02 +0800
Subject: netfilter: nf_nat_sctp: fix ICMP packet to be dropped accidently

Regarding RFC 792, the first 64 bits of the original SCTP datagram's
data could be contained in ICMP packet, such as:

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |     Type      |     Code      |          Checksum             |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                             unused                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |      Internet Header + 64 bits of Original Data Datagram      |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

However, according to RFC 4960, SCTP datagram header is as below:

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |     Source Port Number        |     Destination Port Number   |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                      Verification Tag                         |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                           Checksum                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

It means only the first three fields of SCTP header can be carried in
ICMP packet except for Checksum field.

At present in sctp_manip_pkt(), no matter whether the packet is ICMP or
not, it always calculates SCTP packet checksum. However, not only the
calculation of checksum is unnecessary for ICMP, but also it causes
another fatal issue that ICMP packet is dropped. The header size of
SCTP is used to identify whether the writeable length of skb is bigger
than skb->len through skb_make_writable() in sctp_manip_pkt(). But
when it deals with ICMP packet, skb_make_writable() directly returns
false as the writeable length of skb is bigger than skb->len.
Subsequently ICMP is dropped.

Now we correct this misbahavior. When sctp_manip_pkt() handles ICMP
packet, 8 bytes rather than the whole SCTP header size is used to check
if writeable length of skb is overflowed. Meanwhile, as it's meaningless
to calculate checksum when packet is ICMP, the computation of checksum
is ignored as well.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_proto_sctp.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 31d358691af0..804e8a0ab36e 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb,
 	       enum nf_nat_manip_type maniptype)
 {
 	sctp_sctphdr_t *hdr;
+	int hdrsize = 8;
 
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+	/* This could be an inner header returned in imcp packet; in such
+	 * cases we cannot update the checksum field since it is outside
+	 * of the 8 bytes of transport layer headers we are guaranteed.
+	 */
+	if (skb->len >= hdroff + sizeof(*hdr))
+		hdrsize = sizeof(*hdr);
+
+	if (!skb_make_writable(skb, hdroff + hdrsize))
 		return false;
 
 	hdr = (struct sctphdr *)(skb->data + hdroff);
@@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb,
 		hdr->dest = tuple->dst.u.sctp.port;
 	}
 
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
 		skb->ip_summed = CHECKSUM_NONE;
-- 
cgit v1.2.3


From b0c1e95ab44feaad8831f2c06a3473c974003b49 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 28 Feb 2017 11:10:51 -0800
Subject: i2c: copy device properties when using i2c_register_board_info()

This will allow marking device property lists as __initdata, the same as
board info structures themselves.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-boardinfo.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index 6e5fac6a5262..5b8f6c3a6950 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
+#include <linux/property.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 
@@ -55,6 +56,7 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  *
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
+ * Device properties are deep-copied though.
  */
 int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
@@ -78,6 +80,14 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig
 
 		devinfo->busnum = busnum;
 		devinfo->board_info = *info;
+
+		if (info->properties) {
+			devinfo->board_info.properties =
+					property_entries_dup(info->properties);
+			if (IS_ERR(devinfo->board_info.properties))
+				return PTR_ERR(devinfo->board_info.properties);
+		}
+
 		list_add_tail(&devinfo->list, &__i2c_board_list);
 	}
 
-- 
cgit v1.2.3


From 3b0277f198ac928f323c42e180680d2f79aa980d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 7 Mar 2017 21:06:38 +0100
Subject: i2c: meson: fix wrong variable usage in meson_i2c_put_data

Most likely a copy & paste error.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: 30021e3707a7 ("i2c: add support for Amlogic Meson I2C controller")
---
 drivers/i2c/busses/i2c-meson.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c
index 2aa61bbbd307..73b97c71a484 100644
--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -175,7 +175,7 @@ static void meson_i2c_put_data(struct meson_i2c *i2c, char *buf, int len)
 		wdata1 |= *buf++ << ((i - 4) * 8);
 
 	writel(wdata0, i2c->regs + REG_TOK_WDATA0);
-	writel(wdata0, i2c->regs + REG_TOK_WDATA1);
+	writel(wdata1, i2c->regs + REG_TOK_WDATA1);
 
 	dev_dbg(i2c->dev, "%s: data %08x %08x len %d\n", __func__,
 		wdata0, wdata1, len);
-- 
cgit v1.2.3


From ab809fd81fde3d416f8656d8f814a0777fb9b65e Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@linaro.org>
Date: Tue, 27 Dec 2016 22:22:40 +0800
Subject: i2c: designware: add reset interface

Some platforms like hi3660 need do reset first to allow accessing registers

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Ramiro Oliveira <ramiro.oliveira@synopsys.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.h    |  1 +
 drivers/i2c/busses/i2c-designware-platdrv.c | 28 ++++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index c1db3a5a340f..d9aaf1790e0e 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -88,6 +88,7 @@ struct dw_i2c_dev {
 	void __iomem		*base;
 	struct completion	cmd_complete;
 	struct clk		*clk;
+	struct reset_control	*rst;
 	u32			(*get_clk_rate_khz) (struct dw_i2c_dev *dev);
 	struct dw_pci_controller *controller;
 	int			cmd_err;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6ce431323125..79c4b4ea0539 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -38,6 +38,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
 #include <linux/io.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/platform_data/i2c-designware.h>
@@ -199,6 +200,14 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	dev->irq = irq;
 	platform_set_drvdata(pdev, dev);
 
+	dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+	if (IS_ERR(dev->rst)) {
+		if (PTR_ERR(dev->rst) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+	} else {
+		reset_control_deassert(dev->rst);
+	}
+
 	if (pdata) {
 		dev->clk_freq = pdata->i2c_scl_freq;
 	} else {
@@ -235,12 +244,13 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	    && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) {
 		dev_err(&pdev->dev,
 			"Only 100kHz, 400kHz, 1MHz and 3.4MHz supported");
-		return -EINVAL;
+		r = -EINVAL;
+		goto exit_reset;
 	}
 
 	r = i2c_dw_eval_lock_support(dev);
 	if (r)
-		return r;
+		goto exit_reset;
 
 	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
 
@@ -286,10 +296,18 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	}
 
 	r = i2c_dw_probe(dev);
-	if (r && !dev->pm_runtime_disabled)
-		pm_runtime_disable(&pdev->dev);
+	if (r)
+		goto exit_probe;
 
 	return r;
+
+exit_probe:
+	if (!dev->pm_runtime_disabled)
+		pm_runtime_disable(&pdev->dev);
+exit_reset:
+	if (!IS_ERR_OR_NULL(dev->rst))
+		reset_control_assert(dev->rst);
+	return r;
 }
 
 static int dw_i2c_plat_remove(struct platform_device *pdev)
@@ -306,6 +324,8 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	if (!dev->pm_runtime_disabled)
 		pm_runtime_disable(&pdev->dev);
+	if (!IS_ERR_OR_NULL(dev->rst))
+		reset_control_assert(dev->rst);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 568af6de058cb2b0c5b98d98ffcf37cdc6bc38a7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 4 Mar 2017 19:53:47 +0100
Subject: netfilter: nf_tables: set pktinfo->thoff at AH header if found

Phil Sutter reports that IPv6 AH header matching is broken. From
userspace, nft generates bytecode that expects to find the AH header at
NFT_PAYLOAD_TRANSPORT_HEADER both for IPv4 and IPv6. However,
pktinfo->thoff is set to the inner header after the AH header in IPv6,
while in IPv4 pktinfo->thoff points to the AH header indeed. This
behaviour is inconsistent. This patch fixes this problem by updating
ipv6_find_hdr() to get the IP6_FH_F_AUTH flag so this function stops at
the AH header, so both IPv4 and IPv6 pktinfo->thoff point to the AH
header.

This is also inconsistent when trying to match encapsulated headers:

1) A packet that looks like IPv4 + AH + TCP dport 22 will *not* match.
2) A packet that looks like IPv6 + AH + TCP dport 22 will match.

Reported-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_ipv6.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index d150b5066201..97983d1c05e4 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 		     struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
+	unsigned int flags = IP6_FH_F_AUTH;
 	int protohdr, thoff = 0;
 	unsigned short frag_off;
 
 	nft_set_pktinfo(pkt, skb, state);
 
-	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
 	if (protohdr < 0) {
 		nft_set_pktinfo_proto_unspec(pkt, skb);
 		return;
@@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 				const struct nf_hook_state *state)
 {
 #if IS_ENABLED(CONFIG_IPV6)
+	unsigned int flags = IP6_FH_F_AUTH;
 	struct ipv6hdr *ip6h, _ip6h;
 	unsigned int thoff = 0;
 	unsigned short frag_off;
@@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 	if (pkt_len + sizeof(*ip6h) > skb->len)
 		return -1;
 
-	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
 	if (protohdr < 0)
 		return -1;
 
-- 
cgit v1.2.3


From b6f8fec4448aa52a8c36a392aa1ca2ea99acd460 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:31 +0100
Subject: block: Allow bdi re-registration

SCSI can call device_add_disk() several times for one request queue when
a device in unbound and bound, creating new gendisk each time. This will
lead to bdi being repeatedly registered and unregistered. This was not a
big problem until commit 165a5e22fafb "block: Move bdi_unregister() to
del_gendisk()" since bdi was only registered repeatedly (bdi_register()
handles repeated calls fine, only we ended up leaking reference to
gendisk due to overwriting bdi->owner) but unregistered only in
blk_cleanup_queue() which didn't get called repeatedly. After
165a5e22fafb we were doing correct bdi_register() - bdi_unregister()
cycles however bdi_unregister() is not prepared for it. So make sure
bdi_unregister() cleans up bdi in such a way that it is prepared for
a possible following bdi_register() call.

An easy way to provoke this behavior is to enable
CONFIG_DEBUG_TEST_DRIVER_REMOVE and use scsi_debug driver to create a
scsi disk which immediately hangs without this fix.

Fixes: 165a5e22fafb127ecb5914e12e8c32a1f0d3f820
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 mm/backing-dev.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6d861d090e9f..6ac932210f56 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -710,6 +710,11 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
 	 */
 	atomic_dec(&bdi->usage_cnt);
 	wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
+	/*
+	 * Grab back our reference so that we hold it when @bdi gets
+	 * re-registered.
+	 */
+	atomic_inc(&bdi->usage_cnt);
 }
 
 /**
@@ -857,6 +862,8 @@ int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
 			MINOR(owner->devt));
 	if (rc)
 		return rc;
+	/* Leaking owner reference... */
+	WARN_ON(bdi->owner);
 	bdi->owner = owner;
 	get_device(owner);
 	return 0;
-- 
cgit v1.2.3


From df23de55615fa7a190a85f49a950ccecdd9102f3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:32 +0100
Subject: bdi: Fix use-after-free in wb_congested_put()

bdi_writeback_congested structures get created for each blkcg and bdi
regardless whether bdi is registered or not. When they are created in
unregistered bdi and the request queue (and thus bdi) is then destroyed
while blkg still holds reference to bdi_writeback_congested structure,
this structure will be referencing freed bdi and last wb_congested_put()
will try to remove the structure from already freed bdi.

With commit 165a5e22fafb "block: Move bdi_unregister() to
del_gendisk()", SCSI started to destroy bdis without calling
bdi_unregister() first (previously it was calling bdi_unregister() even
for unregistered bdis) and thus the code detaching
bdi_writeback_congested in cgwb_bdi_destroy() was not triggered and we
started hitting this use-after-free bug. It is enough to boot a KVM
instance with virtio-scsi device to trigger this behavior.

Fix the problem by detaching bdi_writeback_congested structures in
bdi_exit() instead of bdi_unregister(). This is also more logical as
they can get attached to bdi regardless whether it ever got registered
or not.

Fixes: 165a5e22fafb127ecb5914e12e8c32a1f0d3f820
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 mm/backing-dev.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6ac932210f56..c6f2a37028c2 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -683,30 +683,18 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
 {
 	struct radix_tree_iter iter;
-	struct rb_node *rbn;
 	void **slot;
 
 	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
 
 	spin_lock_irq(&cgwb_lock);
-
 	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
 		cgwb_kill(*slot);
-
-	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
-		struct bdi_writeback_congested *congested =
-			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
-
-		rb_erase(rbn, &bdi->cgwb_congested_tree);
-		congested->bdi = NULL;	/* mark @congested unlinked */
-	}
-
 	spin_unlock_irq(&cgwb_lock);
 
 	/*
-	 * All cgwb's and their congested states must be shutdown and
-	 * released before returning.  Drain the usage counter to wait for
-	 * all cgwb's and cgwb_congested's ever created on @bdi.
+	 * All cgwb's must be shutdown and released before returning.  Drain
+	 * the usage counter to wait for all cgwb's ever created on @bdi.
 	 */
 	atomic_dec(&bdi->usage_cnt);
 	wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
@@ -754,6 +742,21 @@ void wb_blkcg_offline(struct blkcg *blkcg)
 	spin_unlock_irq(&cgwb_lock);
 }
 
+static void cgwb_bdi_exit(struct backing_dev_info *bdi)
+{
+	struct rb_node *rbn;
+
+	spin_lock_irq(&cgwb_lock);
+	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
+		struct bdi_writeback_congested *congested =
+			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
+
+		rb_erase(rbn, &bdi->cgwb_congested_tree);
+		congested->bdi = NULL;	/* mark @congested unlinked */
+	}
+	spin_unlock_irq(&cgwb_lock);
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static int cgwb_bdi_init(struct backing_dev_info *bdi)
@@ -774,7 +777,9 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 	return 0;
 }
 
-static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
+static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
+
+static void cgwb_bdi_exit(struct backing_dev_info *bdi)
 {
 	wb_congested_put(bdi->wb_congested);
 }
@@ -905,6 +910,7 @@ static void bdi_exit(struct backing_dev_info *bdi)
 {
 	WARN_ON_ONCE(bdi->dev);
 	wb_exit(&bdi->wb);
+	cgwb_bdi_exit(bdi);
 }
 
 static void release_bdi(struct kref *ref)
-- 
cgit v1.2.3


From 90f16fddcc2802726142b8386c65ccb89f044613 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:33 +0100
Subject: block: Make del_gendisk() safer for disks without queues

Commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()"
added disk->queue dereference to del_gendisk(). Although del_gendisk()
is not supposed to be called without disk->queue valid and
blk_unregister_queue() warns in that case, this change will make it oops
instead. Return to the old more robust behavior of just warning when
del_gendisk() gets called for gendisk with disk->queue being NULL.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/genhd.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index b26a5ea115d0..94f323842b52 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -681,12 +681,16 @@ void del_gendisk(struct gendisk *disk)
 	disk->flags &= ~GENHD_FL_UP;
 
 	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
-	/*
-	 * Unregister bdi before releasing device numbers (as they can get
-	 * reused and we'd get clashes in sysfs).
-	 */
-	bdi_unregister(disk->queue->backing_dev_info);
-	blk_unregister_queue(disk);
+	if (disk->queue) {
+		/*
+		 * Unregister bdi before releasing device numbers (as they can
+		 * get reused and we'd get clashes in sysfs).
+		 */
+		bdi_unregister(disk->queue->backing_dev_info);
+		blk_unregister_queue(disk);
+	} else {
+		WARN_ON(1);
+	}
 	blk_unregister_region(disk_devt(disk), disk->minors);
 
 	part_stat_set_all(&disk->part0, 0);
-- 
cgit v1.2.3


From c01228db4ba965986511a5b28c478bddd7e2726e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:34 +0100
Subject: Revert "scsi, block: fix duplicate bdi name registration crashes"

This reverts commit 0dba1314d4f81115dce711292ec7981d17231064. It causes
leaking of device numbers for SCSI when SCSI registers multiple gendisks
for one request_queue in succession. It can be easily reproduced using
Omar's script [1] on kernel with CONFIG_DEBUG_TEST_DRIVER_REMOVE.
Furthermore the protection provided by this commit is not needed anymore
as the problem it was fixing got also fixed by commit 165a5e22fafb
"block: Move bdi_unregister() to del_gendisk()".

[1]: http://marc.info/?l=linux-block&m=148554717109098&w=2

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 --
 block/genhd.c          | 21 ---------------------
 drivers/scsi/sd.c      | 41 ++++++++---------------------------------
 include/linux/blkdev.h |  1 -
 include/linux/genhd.h  |  8 --------
 5 files changed, 8 insertions(+), 65 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 1086dac8724c..a76895c9776d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -578,8 +578,6 @@ void blk_cleanup_queue(struct request_queue *q)
 		q->queue_lock = &q->__queue_lock;
 	spin_unlock_irq(lock);
 
-	put_disk_devt(q->disk_devt);
-
 	/* @q is and will stay empty, shutdown and put */
 	blk_put_queue(q);
 }
diff --git a/block/genhd.c b/block/genhd.c
index 94f323842b52..a9c516a8b37d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -572,20 +572,6 @@ exit:
 	disk_part_iter_exit(&piter);
 }
 
-void put_disk_devt(struct disk_devt *disk_devt)
-{
-	if (disk_devt && atomic_dec_and_test(&disk_devt->count))
-		disk_devt->release(disk_devt);
-}
-EXPORT_SYMBOL(put_disk_devt);
-
-void get_disk_devt(struct disk_devt *disk_devt)
-{
-	if (disk_devt)
-		atomic_inc(&disk_devt->count);
-}
-EXPORT_SYMBOL(get_disk_devt);
-
 /**
  * device_add_disk - add partitioning information to kernel list
  * @parent: parent device for the disk
@@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
 
 	disk_alloc_events(disk);
 
-	/*
-	 * Take a reference on the devt and assign it to queue since it
-	 * must not be reallocated while the bdi is registered
-	 */
-	disk->queue->disk_devt = disk->disk_devt;
-	get_disk_devt(disk->disk_devt);
-
 	/* Register BDI before referencing it from bdev */
 	bdi = disk->queue->backing_dev_info;
 	bdi_register_owner(bdi, disk_to_dev(disk));
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c7839f6c35cc..d277e8620e3e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3075,23 +3075,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 	put_device(&sdkp->dev);
 }
 
-struct sd_devt {
-	int idx;
-	struct disk_devt disk_devt;
-};
-
-static void sd_devt_release(struct disk_devt *disk_devt)
-{
-	struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
-			disk_devt);
-
-	spin_lock(&sd_index_lock);
-	ida_remove(&sd_index_ida, sd_devt->idx);
-	spin_unlock(&sd_index_lock);
-
-	kfree(sd_devt);
-}
-
 /**
  *	sd_probe - called during driver initialization and whenever a
  *	new scsi device is attached to the system. It is called once
@@ -3113,7 +3096,6 @@ static void sd_devt_release(struct disk_devt *disk_devt)
 static int sd_probe(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	struct sd_devt *sd_devt;
 	struct scsi_disk *sdkp;
 	struct gendisk *gd;
 	int index;
@@ -3139,13 +3121,9 @@ static int sd_probe(struct device *dev)
 	if (!sdkp)
 		goto out;
 
-	sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL);
-	if (!sd_devt)
-		goto out_free;
-
 	gd = alloc_disk(SD_MINORS);
 	if (!gd)
-		goto out_free_devt;
+		goto out_free;
 
 	do {
 		if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
@@ -3161,11 +3139,6 @@ static int sd_probe(struct device *dev)
 		goto out_put;
 	}
 
-	atomic_set(&sd_devt->disk_devt.count, 1);
-	sd_devt->disk_devt.release = sd_devt_release;
-	sd_devt->idx = index;
-	gd->disk_devt = &sd_devt->disk_devt;
-
 	error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
 	if (error) {
 		sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
@@ -3205,12 +3178,11 @@ static int sd_probe(struct device *dev)
 	return 0;
 
  out_free_index:
-	put_disk_devt(&sd_devt->disk_devt);
-	sd_devt = NULL;
+	spin_lock(&sd_index_lock);
+	ida_remove(&sd_index_ida, index);
+	spin_unlock(&sd_index_lock);
  out_put:
 	put_disk(gd);
- out_free_devt:
-	kfree(sd_devt);
  out_free:
 	kfree(sdkp);
  out:
@@ -3271,7 +3243,10 @@ static void scsi_disk_release(struct device *dev)
 	struct scsi_disk *sdkp = to_scsi_disk(dev);
 	struct gendisk *disk = sdkp->disk;
 	
-	put_disk_devt(disk->disk_devt);
+	spin_lock(&sd_index_lock);
+	ida_remove(&sd_index_ida, sdkp->index);
+	spin_unlock(&sd_index_lock);
+
 	disk->private_data = NULL;
 	put_disk(disk);
 	put_device(&sdkp->device->sdev_gendev);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 796016e63c1d..5a7da607ca04 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -435,7 +435,6 @@ struct request_queue {
 	struct delayed_work	delay_work;
 
 	struct backing_dev_info	*backing_dev_info;
-	struct disk_devt	*disk_devt;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a999d281a2f1..76f39754e7b0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -167,13 +167,6 @@ struct blk_integrity {
 };
 
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
-struct disk_devt {
-	atomic_t count;
-	void (*release)(struct disk_devt *disk_devt);
-};
-
-void put_disk_devt(struct disk_devt *disk_devt);
-void get_disk_devt(struct disk_devt *disk_devt);
 
 struct gendisk {
 	/* major, first_minor and minors are input parameters only,
@@ -183,7 +176,6 @@ struct gendisk {
 	int first_minor;
 	int minors;                     /* maximum number of minors, =1 for
                                          * disks that can't be partitioned. */
-	struct disk_devt *disk_devt;
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
 	char *(*devnode)(struct gendisk *gd, umode_t *mode);
-- 
cgit v1.2.3


From 79bd99596b7305ab08109a8bf44a6a4511dbf1cd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 8 Mar 2017 07:38:05 +1100
Subject: blk: improve order of bio handling in generic_make_request()

To avoid recursion on the kernel stack when stacked block devices
are in use, generic_make_request() will, when called recursively,
queue new requests for later handling.  They will be handled when the
make_request_fn for the current bio completes.

If any bios are submitted by a make_request_fn, these will ultimately
be handled seqeuntially.  If the handling of one of those generates
further requests, they will be added to the end of the queue.

This strict first-in-first-out behaviour can lead to deadlocks in
various ways, normally because a request might need to wait for a
previous request to the same device to complete.  This can happen when
they share a mempool, and can happen due to interdependencies
particular to the device.  Both md and dm have examples where this happens.

These deadlocks can be erradicated by more selective ordering of bios.
Specifically by handling them in depth-first order.  That is: when the
handling of one bio generates one or more further bios, they are
handled immediately after the parent, before any siblings of the
parent.  That way, when generic_make_request() calls make_request_fn
for some particular device, we can be certain that all previously
submited requests for that device have been completely handled and are
not waiting for anything in the queue of requests maintained in
generic_make_request().

An easy way to achieve this would be to use a last-in-first-out stack
instead of a queue.  However this will change the order of consecutive
bios submitted by a make_request_fn, which could have unexpected consequences.
Instead we take a slightly more complex approach.
A fresh queue is created for each call to a make_request_fn.  After it completes,
any bios for a different device are placed on the front of the main queue, followed
by any bios for the same device, followed by all bios that were already on
the queue before the make_request_fn was called.
This provides the depth-first approach without reordering bios on the same level.

This, by itself, it not enough to remove all deadlocks.  It just makes
it possible for drivers to take the extra step required themselves.

To avoid deadlocks, drivers must never risk waiting for a request
after submitting one to generic_make_request.  This includes never
allocing from a mempool twice in the one call to a make_request_fn.

A common pattern in drivers is to call bio_split() in a loop, handling
the first part and then looping around to possibly split the next part.
Instead, a driver that finds it needs to split a bio should queue
(with generic_make_request) the second part, handle the first part,
and then return.  The new code in generic_make_request will ensure the
requests to underlying bios are processed first, then the second bio
that was split off.  If it splits again, the same process happens.  In
each case one bio will be completely handled before the next one is attempted.

With this is place, it should be possible to disable the
punt_bios_to_recover() recovery thread for many block devices, and
eventually it may be possible to remove it completely.

Ref: http://www.spinics.net/lists/raid/msg54680.html
Tested-by: Jinpu Wang <jinpu.wang@profitbricks.com>
Inspired-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a76895c9776d..0eeb99ef654f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2015,17 +2015,34 @@ blk_qc_t generic_make_request(struct bio *bio)
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		if (likely(blk_queue_enter(q, false) == 0)) {
+			struct bio_list hold;
+			struct bio_list lower, same;
+
+			/* Create a fresh bio_list for all subordinate requests */
+			hold = bio_list_on_stack;
+			bio_list_init(&bio_list_on_stack);
 			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
 
-			bio = bio_list_pop(current->bio_list);
+			/* sort new bios into those for a lower level
+			 * and those for the same level
+			 */
+			bio_list_init(&lower);
+			bio_list_init(&same);
+			while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+				if (q == bdev_get_queue(bio->bi_bdev))
+					bio_list_add(&same, bio);
+				else
+					bio_list_add(&lower, bio);
+			/* now assemble so we handle the lowest level first */
+			bio_list_merge(&bio_list_on_stack, &lower);
+			bio_list_merge(&bio_list_on_stack, &same);
+			bio_list_merge(&bio_list_on_stack, &hold);
 		} else {
-			struct bio *bio_next = bio_list_pop(current->bio_list);
-
 			bio_io_error(bio);
-			bio = bio_next;
 		}
+		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 
-- 
cgit v1.2.3


From 672a2c87c83649fb0167202342ce85af9a3b4f1c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 14:56:05 +0100
Subject: axonram: Fix gendisk handling

It is invalid to call del_gendisk() when disk->queue is NULL. Fix error
handling in axon_ram_probe() to avoid doing that.

Also del_gendisk() does not drop a reference to gendisk allocated by
alloc_disk(). That has to be done by put_disk(). Add that call where
needed.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 arch/powerpc/sysdev/axonram.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ada29eaed6e2..f523ac883150 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -274,7 +274,9 @@ failed:
 			if (bank->disk->major > 0)
 				unregister_blkdev(bank->disk->major,
 						bank->disk->disk_name);
-			del_gendisk(bank->disk);
+			if (bank->disk->flags & GENHD_FL_UP)
+				del_gendisk(bank->disk);
+			put_disk(bank->disk);
 		}
 		device->dev.platform_data = NULL;
 		if (bank->io_addr != 0)
@@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device)
 	device_remove_file(&device->dev, &dev_attr_ecc);
 	free_irq(bank->irq_id, device);
 	del_gendisk(bank->disk);
+	put_disk(bank->disk);
 	iounmap((void __iomem *) bank->io_addr);
 	kfree(bank);
 
-- 
cgit v1.2.3


From f65e6fad293b3a5793b7fa2044800506490e7a2e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 8 Mar 2017 09:58:08 -0800
Subject: xfs: use iomap new flag for newly allocated delalloc blocks

Commit fa7f138 ("xfs: clear delalloc and cache on buffered write
failure") fixed one regression in the iomap error handling code and
exposed another. The fundamental problem is that if a buffered write
is a rewrite of preexisting delalloc blocks and the write fails, the
failure handling code can punch out preexisting blocks with valid
file data.

This was reproduced directly by sub-block writes in the LTP
kernel/syscalls/write/write03 test. A first 100 byte write allocates
a single block in a file. A subsequent 100 byte write fails and
punches out the block, including the data successfully written by
the previous write.

To address this problem, update the ->iomap_begin() handler to
distinguish newly allocated delalloc blocks from preexisting
delalloc blocks via the IOMAP_F_NEW flag. Use this flag in the
->iomap_end() handler to decide when a failed or short write should
punch out delalloc blocks.

This introduces the subtle requirement that ->iomap_begin() should
never combine newly allocated delalloc blocks with existing blocks
in the resulting iomap descriptor. This can occur when a new
delalloc reservation merges with a neighboring extent that is part
of the current write, for example. Therefore, drop the
post-allocation extent lookup from xfs_bmapi_reserve_delalloc() and
just return the record inserted into the fork. This ensures only new
blocks are returned and thus that preexisting delalloc blocks are
always handled as "found" blocks and not punched out on a failed
rewrite.

Reported-by: Xiong Zhou <xzhou@redhat.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 24 ++++++++++++++----------
 fs/xfs/xfs_iomap.c       | 25 ++++++++++++++++++-------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c66d47757a..bfa59a1a2d09 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4150,6 +4150,19 @@ xfs_bmapi_read(
 	return 0;
 }
 
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
 int
 xfs_bmapi_reserve_delalloc(
 	struct xfs_inode	*ip,
@@ -4236,13 +4249,8 @@ xfs_bmapi_reserve_delalloc(
 	got->br_startblock = nullstartblock(indlen);
 	got->br_blockcount = alen;
 	got->br_state = XFS_EXT_NORM;
-	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
-	/*
-	 * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
-	 * might have merged it into one of the neighbouring ones.
-	 */
-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
 	/*
 	 * Tag the inode if blocks were preallocated. Note that COW fork
@@ -4254,10 +4262,6 @@ xfs_bmapi_reserve_delalloc(
 	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
 		xfs_inode_set_cowblocks_tag(ip);
 
-	ASSERT(got->br_startoff <= aoff);
-	ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
-	ASSERT(isnullstartblock(got->br_startblock));
-	ASSERT(got->br_state == XFS_EXT_NORM);
 	return 0;
 
 out_unreserve_blocks:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 41662fb14e87..288ee5b840d7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -630,6 +630,11 @@ retry:
 		goto out_unlock;
 	}
 
+	/*
+	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+	 * them out if the write happens to fail.
+	 */
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
 	struct xfs_inode	*ip,
 	loff_t			offset,
 	loff_t			length,
-	ssize_t			written)
+	ssize_t			written,
+	struct iomap		*iomap)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	xfs_fileoff_t		start_fsb;
 	xfs_fileoff_t		end_fsb;
 	int			error = 0;
 
-	/* behave as if the write failed if drop writes is enabled */
-	if (xfs_mp_drop_writes(mp))
+	/*
+	 * Behave as if the write failed if drop writes is enabled. Set the NEW
+	 * flag to force delalloc cleanup.
+	 */
+	if (xfs_mp_drop_writes(mp)) {
+		iomap->flags |= IOMAP_F_NEW;
 		written = 0;
+	}
 
 	/*
 	 * start_fsb refers to the first unused block after a short write. If
@@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
 	/*
-	 * Trim back delalloc blocks if we didn't manage to write the whole
-	 * range reserved.
+	 * Trim delalloc blocks if they were allocated by this write and we
+	 * didn't manage to write the whole range.
 	 *
 	 * We don't need to care about racing delalloc as we hold i_mutex
 	 * across the reserve/allocate/unreserve calls. If there are delalloc
 	 * blocks in the range, they are ours.
 	 */
-	if (start_fsb < end_fsb) {
+	if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
 		truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
 					 XFS_FSB_TO_B(mp, end_fsb) - 1);
 
@@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
 {
 	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
 		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
-				length, written);
+				length, written, iomap);
 	return 0;
 }
 
-- 
cgit v1.2.3


From bd0f9b356d00aa241ced36fb075a07041c28d3b8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 7 Mar 2017 15:33:14 -0800
Subject: sched/headers: fix up header file dependency on
 <linux/sched/signal.h>

The scheduler header file split and cleanups ended up exposing a few
nasty header file dependencies, and in particular it showed how we in
<linux/wait.h> ended up depending on "signal_pending()", which now comes
from <linux/sched/signal.h>.

That's a very subtle and annoying dependency, which already caused a
semantic merge conflict (see commit e58bc927835a "Pull overlayfs updates
from Miklos Szeredi", which added that fixup in the merge commit).

It turns out that we can avoid this dependency _and_ improve code
generation by moving the guts of the fairly nasty helper #define
__wait_event_interruptible_locked() to out-of-line code.  The code that
includes the signal_pending() check is all in the slow-path where we
actually go to sleep waiting for the event anyway, so using a helper
function is the right thing to do.

Using a helper function is also what we already did for the non-locked
versions, see the "__wait_event*()" macros and the "prepare_to_wait*()"
set of helper functions.

We might want to try to unify all these macro games, we have a _lot_ of
subtly different wait-event loops.  But this is the minimal patch to fix
the annoying header dependency.

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 31 ++++++++++---------------------
 kernel/sched/wait.c  | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index aacb1282d19a..db076ca7f11d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -620,30 +620,19 @@ do {									\
 	__ret;								\
 })
 
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
 
-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
 ({									\
-	int __ret = 0;							\
+	int __ret;							\
 	DEFINE_WAIT(__wait);						\
 	if (exclusive)							\
 		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
 	do {								\
-		if (likely(list_empty(&__wait.task_list)))		\
-			__add_wait_queue_tail(&(wq), &__wait);		\
-		set_current_state(TASK_INTERRUPTIBLE);			\
-		if (signal_pending(current)) {				\
-			__ret = -ERESTARTSYS;				\
+		__ret = fn(&(wq), &__wait);				\
+		if (__ret)						\
 			break;						\
-		}							\
-		if (irq)						\
-			spin_unlock_irq(&(wq).lock);			\
-		else							\
-			spin_unlock(&(wq).lock);			\
-		schedule();						\
-		if (irq)						\
-			spin_lock_irq(&(wq).lock);			\
-		else							\
-			spin_lock(&(wq).lock);				\
 	} while (!(condition));						\
 	__remove_wait_queue(&(wq), &__wait);				\
 	__set_current_state(TASK_RUNNING);				\
@@ -676,7 +665,7 @@ do {									\
  */
 #define wait_event_interruptible_locked(wq, condition)			\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
 
 /**
  * wait_event_interruptible_locked_irq - sleep until a condition gets true
@@ -703,7 +692,7 @@ do {									\
  */
 #define wait_event_interruptible_locked_irq(wq, condition)		\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
 
 /**
  * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
@@ -734,7 +723,7 @@ do {									\
  */
 #define wait_event_interruptible_exclusive_locked(wq, condition)	\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
 
 /**
  * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
@@ -765,7 +754,7 @@ do {									\
  */
 #define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
 
 
 #define __wait_event_killable(wq, condition)				\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 4d2ea6f25568..b8c84c6dee64 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -242,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 }
 EXPORT_SYMBOL(prepare_to_wait_event);
 
+/*
+ * Note! These two wait functions are entered with the
+ * wait-queue lock held (and interrupts off in the _irq
+ * case), so there is no race with testing the wakeup
+ * condition in the caller before they add the wait
+ * entry to the wake queue.
+ */
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+	if (likely(list_empty(&wait->task_list)))
+		__add_wait_queue_tail(wq, wait);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spin_unlock(&wq->lock);
+	schedule();
+	spin_lock(&wq->lock);
+	return 0;
+}
+EXPORT_SYMBOL(do_wait_intr);
+
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+	if (likely(list_empty(&wait->task_list)))
+		__add_wait_queue_tail(wq, wait);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spin_unlock_irq(&wq->lock);
+	schedule();
+	spin_lock_irq(&wq->lock);
+	return 0;
+}
+EXPORT_SYMBOL(do_wait_intr_irq);
+
 /**
  * finish_wait - clean up after waiting in a queue
  * @q: waitqueue waited on
-- 
cgit v1.2.3


From 2fcc319d2467a5f5b78f35f79fd6e22741a31b1e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Mar 2017 10:38:53 -0800
Subject: xfs: try any AG when allocating the first btree block when reflinking

When a reflink operation causes the bmap code to allocate a btree block
we're currently doing single-AG allocations due to having ->firstblock
set and then try any higher AG due a little reflink quirk we've put in
when adding the reflink code.  But given that we do not have a minleft
reservation of any kind in this AG we can still not have any space in
the same or higher AG even if the file system has enough free space.
To fix this use a XFS_ALLOCTYPE_FIRST_AG allocation in this fall back
path instead.

[And yes, we need to redo this properly instead of piling hacks over
 hacks.  I'm working on that, but it's not going to be a small series.
 In the meantime this fixes the customer reported issue]

Also add a warning for failing allocations to make it easier to debug.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 10 +++++++---
 fs/xfs/libxfs/xfs_bmap_btree.c |  6 +++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index bfa59a1a2d09..9bd104f32908 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 	} else if (dfops->dop_low) {
-try_another_ag:
 		args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
 		args.fsbno = *firstblock;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -790,13 +790,17 @@ try_another_ag:
 	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
 	    args.fsbno == NULLFSBLOCK &&
 	    args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-		dfops->dop_low = true;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
 		goto try_another_ag;
 	}
+	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+		xfs_iroot_realloc(ip, -1, whichfork);
+		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+		return -ENOSPC;
+	}
 	/*
 	 * Allocation can't fail, the space was reserved.
 	 */
-	ASSERT(args.fsbno != NULLFSBLOCK);
 	ASSERT(*firstblock == NULLFSBLOCK ||
 	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f93072b58a58..fd55db479385 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(
 
 	if (args.fsbno == NULLFSBLOCK) {
 		args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
 		args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
 		/*
 		 * Make sure there is sufficient room left in the AG to
 		 * complete a full tree split for an extent insert.  If
@@ -488,8 +488,8 @@ try_another_ag:
 	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
 	    args.fsbno == NULLFSBLOCK &&
 	    args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-		cur->bc_private.b.dfops->dop_low = true;
 		args.fsbno = cur->bc_private.b.firstblock;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
 		goto try_another_ag;
 	}
 
@@ -506,7 +506,7 @@ try_another_ag:
 			goto error0;
 		cur->bc_private.b.dfops->dop_low = true;
 	}
-	if (args.fsbno == NULLFSBLOCK) {
+	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
-- 
cgit v1.2.3


From 04bb94b13c02e9dbc92d622cddf88937127bd7ed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 8 Mar 2017 10:42:13 -0800
Subject: overlayfs: remove now unnecessary header file include

This removes the extra include header file that was added in commit
e58bc927835a "Pull overlayfs updates from Miklos Szeredi" now that it
is no longer needed.

There are probably other such includes that got added during the
scheduler header splitup series, but this is the one that annoyed me
personally and I know about.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/overlayfs/util.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 1953986ee6bc..6e610a205e15 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/cred.h>
 #include <linux/xattr.h>
-#include <linux/sched/signal.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
-- 
cgit v1.2.3


From b4fb8f66f1ae2e167d06c12d018025a8d4d3ba7e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 8 Mar 2017 09:35:39 -0800
Subject: mm, page_alloc: Add missing check for memory holes

Commit 13ad59df67f1 ("mm, page_alloc: avoid page_to_pfn() when merging
buddies") moved the check for memory holes out of page_is_buddy() and
had the callers do the check.

But this wasn't done correctly in one place which caused ia64 to crash
very early in boot.

Update to fix that and make ia64 boot again.

[ v2: Vlastimil pointed out we don't need to call page_to_pfn()
      since we already have the result of that in "buddy_pfn" ]

Fixes: 13ad59df67f1 ("avoid page_to_pfn() when merging buddies")
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eaa64d2ffdc5..6cbde310abed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -873,7 +873,8 @@ done_merging:
 		higher_page = page + (combined_pfn - pfn);
 		buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
 		higher_buddy = higher_page + (buddy_pfn - combined_pfn);
-		if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
+		if (pfn_valid_within(buddy_pfn) &&
+		    page_is_buddy(higher_page, higher_buddy, order + 1)) {
 			list_add_tail(&page->lru,
 				&zone->free_area[order].free_list[migratetype]);
 			goto out;
-- 
cgit v1.2.3


From c085bd5119d5d0bdf3ef591a5563566be7dedced Mon Sep 17 00:00:00 2001
From: Jim Qu <Jim.Qu@amd.com>
Date: Wed, 1 Mar 2017 15:53:29 +0800
Subject: drm/amd/amdgpu: fix console deadlock if late init failed

Signed-off-by: Jim Qu <Jim.Qu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6abb238b25c9..4120b351a8e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2094,8 +2094,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	r = amdgpu_late_init(adev);
-	if (r)
+	if (r) {
+		if (fbcon)
+			console_unlock();
 		return r;
+	}
 
 	/* pin cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-- 
cgit v1.2.3


From 67b0503db9c29b04eadfeede6bebbfe5ddad94ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
Date: Sun, 12 Feb 2017 13:02:13 -0200
Subject: [media] dvb-usb-firmware: don't do DMA on stack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The buffer allocation for the firmware data was changed in
commit 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load")
but the same applies for the reset value.

Fixes: 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load")
Cc: stable@vger.kernel.org
Signed-off-by: Stefan Brüns <stefan.bruens@rwth-aachen.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/usb/dvb-usb/dvb-usb-firmware.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
index ab9866024ec7..04033efe7ad5 100644
--- a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
+++ b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
@@ -36,16 +36,18 @@ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 le
 int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type)
 {
 	struct hexline *hx;
-	u8 reset;
-	int ret,pos=0;
+	u8 *buf;
+	int ret, pos = 0;
+	u16 cpu_cs_register = cypress[type].cpu_cs_register;
 
-	hx = kmalloc(sizeof(*hx), GFP_KERNEL);
-	if (!hx)
+	buf = kmalloc(sizeof(*hx), GFP_KERNEL);
+	if (!buf)
 		return -ENOMEM;
+	hx = (struct hexline *)buf;
 
 	/* stop the CPU */
-	reset = 1;
-	if ((ret = usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1)) != 1)
+	buf[0] = 1;
+	if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1)
 		err("could not stop the USB controller CPU.");
 
 	while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) {
@@ -61,21 +63,21 @@ int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw
 	}
 	if (ret < 0) {
 		err("firmware download failed at %d with %d",pos,ret);
-		kfree(hx);
+		kfree(buf);
 		return ret;
 	}
 
 	if (ret == 0) {
 		/* restart the CPU */
-		reset = 0;
-		if (ret || usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1) != 1) {
+		buf[0] = 0;
+		if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) {
 			err("could not restart the USB controller CPU.");
 			ret = -EINVAL;
 		}
 	} else
 		ret = -EIO;
 
-	kfree(hx);
+	kfree(buf);
 
 	return ret;
 }
-- 
cgit v1.2.3


From f04d108029063a8a67528a88449c412aecf4d3d8 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:26:30 +0530
Subject: powerpc/perf: Fix perf_get_data_addr() for power9 DD1

Power9 DD1 do not support PMU_HAS_SIER flag and sdsync in
perf_get_data_addr() defaults to MMCRA_SDSYNC which is wrong. Since
power9 MMCRA does not support SDSYNC bit, patch includes PPMU_NO_SIAR
flag to the check and set the sdsync with MMCRA_SAMPLE_ENABLE;

Fixes: 27593d72c4ad ("powerpc/perf: Use MSR to report privilege level on P9 DD1")
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 595dd718ea87..2ff13249f87a 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 			sdsync = POWER7P_MMCRA_SDAR_VALID;
 		else if (ppmu->flags & PPMU_ALT_SIPR)
 			sdsync = POWER6_MMCRA_SDSYNC;
+		else if (ppmu->flags & PPMU_NO_SIAR)
+			sdsync = MMCRA_SAMPLE_ENABLE;
 		else
 			sdsync = MMCRA_SDSYNC;
 
-- 
cgit v1.2.3


From 78b4416aa249365dd3c1b64da4d3a232014320b0 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:29:03 +0530
Subject: powerpc/perf: Handle sdar_mode for marked event in power9

MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
continous sampling mode. On P9 it must be set to 0b00 when
MMCRA[63] is set.

Fixes: c7c3f568beff2 ('powerpc/perf: macros for power9 format encoding')
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.c | 43 ++++++++++++++++++++++++++++++++-------
 arch/powerpc/perf/isa207-common.h |  1 +
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index e79fb5fb817d..cd951fd231c4 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -65,12 +65,41 @@ static bool is_event_valid(u64 event)
 	return !(event & ~valid_mask);
 }
 
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
-		return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+	if (event & EVENT_IS_MARKED)
+		return true;
+
+	return false;
+}
 
-	return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+	/*
+	 * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+	 * continous sampling mode.
+	 *
+	 * Incase of Power8:
+	 * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+	 * mode and will be un-changed when setting MMCRA[63] (Marked events).
+	 *
+	 * Incase of Power9:
+	 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+	 *               or if group already have any marked events.
+	 * Non-Marked events (for DD1):
+	 *	MMCRA[SDAR_MODE] will be set to 0b01
+	 * For rest
+	 *	MMCRA[SDAR_MODE] will be set from event code.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+		else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			*mmcra |= MMCRA_SDAR_MODE_TLB;
+	} else
+		*mmcra |= MMCRA_SDAR_MODE_TLB;
 }
 
 static u64 thresh_cmp_val(u64 value)
@@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 		value |= CNST_L1_QUAL_VAL(cache);
 	}
 
-	if (event & EVENT_IS_MARKED) {
+	if (is_event_marked(event)) {
 		mask  |= CNST_SAMPLE_MASK;
 		value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
 	}
@@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 		}
 
 		/* In continuous sampling mode, update SDAR on TLB miss */
-		mmcra |= mmcra_sdar_mode(event[i]);
+		mmcra_sdar_mode(event[i], &mmcra);
 
 		if (event[i] & EVENT_IS_L1) {
 			cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
@@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 			mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
 		}
 
-		if (event[i] & EVENT_IS_MARKED) {
+		if (is_event_marked(event[i])) {
 			mmcra |= MMCRA_SAMPLE_ENABLE;
 
 			val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index cf9bd8990159..899210f14ee4 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -246,6 +246,7 @@
 #define MMCRA_THR_CMP_SHIFT		32
 #define MMCRA_SDAR_MODE_SHIFT		42
 #define MMCRA_SDAR_MODE_TLB		(1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES	~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
 #define MMCRA_IFM_SHIFT			30
 
 /* MMCR1 Threshold Compare bit constant for power9 */
-- 
cgit v1.2.3


From 605df8d674ac65e044a0bf4998b28c2f350b7f9e Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Tue, 7 Mar 2017 11:39:31 +1100
Subject: selftests/powerpc: Replace stxvx and lxvx with stxvd2x/lxvd2x

On POWER8 (ISA 2.07) lxvx and stxvx are defined to be extended mnemonics
of lxvd2x and stxvd2x. For POWER9 (ISA 3.0) the HW architects in their
infinite wisdom made lxvx and stxvx instructions in their own right.

POWER9 aware GCC will use the POWER9 instruction for lxvx and stxvx
causing these selftests to fail on POWER8. Further compounding the
issue, because of the way -mvsx works it will cause the power9
instructions to be used regardless of -mcpu=power8 to GCC or -mpower8 to
AS.

The safest way to address the problem for now is to not use the extended
mnemonic. We don't care how the CPU loads the values from memory since
the tests only performs register comparisons, so using stdvd2x/lxvd2x
does not impact the test.

Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Acked-by: Balbir Singh<bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/include/vsx_asm.h | 48 +++++++++++------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
index d828bfb6ef2d..54064ced9e95 100644
--- a/tools/testing/selftests/powerpc/include/vsx_asm.h
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -16,56 +16,56 @@
  */
 FUNC_START(load_vsx)
 	li	r5,0
-	lxvx	vs20,r5,r3
+	lxvd2x	vs20,r5,r3
 	addi	r5,r5,16
-	lxvx	vs21,r5,r3
+	lxvd2x	vs21,r5,r3
 	addi	r5,r5,16
-	lxvx	vs22,r5,r3
+	lxvd2x	vs22,r5,r3
 	addi	r5,r5,16
-	lxvx	vs23,r5,r3
+	lxvd2x	vs23,r5,r3
 	addi	r5,r5,16
-	lxvx	vs24,r5,r3
+	lxvd2x	vs24,r5,r3
 	addi	r5,r5,16
-	lxvx	vs25,r5,r3
+	lxvd2x	vs25,r5,r3
 	addi	r5,r5,16
-	lxvx	vs26,r5,r3
+	lxvd2x	vs26,r5,r3
 	addi	r5,r5,16
-	lxvx	vs27,r5,r3
+	lxvd2x	vs27,r5,r3
 	addi	r5,r5,16
-	lxvx	vs28,r5,r3
+	lxvd2x	vs28,r5,r3
 	addi	r5,r5,16
-	lxvx	vs29,r5,r3
+	lxvd2x	vs29,r5,r3
 	addi	r5,r5,16
-	lxvx	vs30,r5,r3
+	lxvd2x	vs30,r5,r3
 	addi	r5,r5,16
-	lxvx	vs31,r5,r3
+	lxvd2x	vs31,r5,r3
 	blr
 FUNC_END(load_vsx)
 
 FUNC_START(store_vsx)
 	li	r5,0
-	stxvx	vs20,r5,r3
+	stxvd2x	vs20,r5,r3
 	addi	r5,r5,16
-	stxvx	vs21,r5,r3
+	stxvd2x	vs21,r5,r3
 	addi	r5,r5,16
-	stxvx	vs22,r5,r3
+	stxvd2x	vs22,r5,r3
 	addi	r5,r5,16
-	stxvx	vs23,r5,r3
+	stxvd2x	vs23,r5,r3
 	addi	r5,r5,16
-	stxvx	vs24,r5,r3
+	stxvd2x	vs24,r5,r3
 	addi	r5,r5,16
-	stxvx	vs25,r5,r3
+	stxvd2x	vs25,r5,r3
 	addi	r5,r5,16
-	stxvx	vs26,r5,r3
+	stxvd2x	vs26,r5,r3
 	addi	r5,r5,16
-	stxvx	vs27,r5,r3
+	stxvd2x	vs27,r5,r3
 	addi	r5,r5,16
-	stxvx	vs28,r5,r3
+	stxvd2x	vs28,r5,r3
 	addi	r5,r5,16
-	stxvx	vs29,r5,r3
+	stxvd2x	vs29,r5,r3
 	addi	r5,r5,16
-	stxvx	vs30,r5,r3
+	stxvd2x	vs30,r5,r3
 	addi	r5,r5,16
-	stxvx	vs31,r5,r3
+	stxvd2x	vs31,r5,r3
 	blr
 FUNC_END(store_vsx)
-- 
cgit v1.2.3


From b78125e00fce0a858c7827dd47ce500320d6d0f7 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Tue, 28 Feb 2017 23:49:38 +0100
Subject: net: smsc: smc911x: use new api ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc911x.c | 51 ++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 4f19c6166182..36307d34f641 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc911x_ethtool_get_link_ksettings(struct net_device *dev,
+				   struct ethtool_link_ksettings *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret, status;
 	unsigned long flags;
+	u32 supported;
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_gset(&lp->mii, cmd);
+		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		cmd->supported = SUPPORTED_10baseT_Half |
+		supported = SUPPORTED_10baseT_Half |
 				SUPPORTED_10baseT_Full |
 				SUPPORTED_TP | SUPPORTED_AUI;
 
 		if (lp->ctl_rspeed == 10)
-			ethtool_cmd_speed_set(cmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 		else if (lp->ctl_rspeed == 100)
-			ethtool_cmd_speed_set(cmd, SPEED_100);
-
-		cmd->autoneg = AUTONEG_DISABLE;
-		if (lp->mii.phy_id==1)
-			cmd->transceiver = XCVR_INTERNAL;
-		else
-			cmd->transceiver = XCVR_EXTERNAL;
-		cmd->port = 0;
+			cmd->base.speed = SPEED_100;
+
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->base.port = 0;
 		SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status);
-		cmd->duplex =
+		cmd->base.duplex =
 			(status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
+
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported, supported);
+
 		ret = 0;
 	}
 
@@ -1487,7 +1487,8 @@ smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 
 static int
-smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc911x_ethtool_set_link_ksettings(struct net_device *dev,
+				   const struct ethtool_link_ksettings *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1495,16 +1496,18 @@ smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_sset(&lp->mii, cmd);
+		ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		if (cmd->autoneg != AUTONEG_DISABLE ||
-			cmd->speed != SPEED_10 ||
-			(cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-			(cmd->port != PORT_TP && cmd->port != PORT_AUI))
+		if (cmd->base.autoneg != AUTONEG_DISABLE ||
+		    cmd->base.speed != SPEED_10 ||
+		    (cmd->base.duplex != DUPLEX_HALF &&
+		     cmd->base.duplex != DUPLEX_FULL) ||
+		    (cmd->base.port != PORT_TP &&
+		     cmd->base.port != PORT_AUI))
 			return -EINVAL;
 
-		lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+		lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
 
 		ret = 0;
 	}
@@ -1686,8 +1689,6 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev)
 }
 
 static const struct ethtool_ops smc911x_ethtool_ops = {
-	.get_settings	 = smc911x_ethtool_getsettings,
-	.set_settings	 = smc911x_ethtool_setsettings,
 	.get_drvinfo	 = smc911x_ethtool_getdrvinfo,
 	.get_msglevel	 = smc911x_ethtool_getmsglevel,
 	.set_msglevel	 = smc911x_ethtool_setmsglevel,
@@ -1698,6 +1699,8 @@ static const struct ethtool_ops smc911x_ethtool_ops = {
 	.get_eeprom_len = smc911x_ethtool_geteeprom_len,
 	.get_eeprom = smc911x_ethtool_geteeprom,
 	.set_eeprom = smc911x_ethtool_seteeprom,
+	.get_link_ksettings	 = smc911x_ethtool_get_link_ksettings,
+	.set_link_ksettings	 = smc911x_ethtool_set_link_ksettings,
 };
 
 /*
-- 
cgit v1.2.3


From 7b022a1b706f7684341e285e627a74a98e730301 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sat, 4 Mar 2017 12:42:39 +0100
Subject: net: smsc: smc91x: use new api ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc91x.c | 47 ++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 65077c77082a..91e9bd7159ab 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1535,32 +1535,33 @@ static int smc_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_get_link_ksettings(struct net_device *dev,
+			       struct ethtool_link_ksettings *cmd)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	int ret;
 
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
-
 	if (lp->phy_type != 0) {
 		spin_lock_irq(&lp->lock);
-		ret = mii_ethtool_gset(&lp->mii, cmd);
+		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irq(&lp->lock);
 	} else {
-		cmd->supported = SUPPORTED_10baseT_Half |
+		u32 supported = SUPPORTED_10baseT_Half |
 				 SUPPORTED_10baseT_Full |
 				 SUPPORTED_TP | SUPPORTED_AUI;
 
 		if (lp->ctl_rspeed == 10)
-			ethtool_cmd_speed_set(cmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 		else if (lp->ctl_rspeed == 100)
-			ethtool_cmd_speed_set(cmd, SPEED_100);
+			cmd->base.speed = SPEED_100;
+
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->base.port = 0;
+		cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ?
+			DUPLEX_FULL : DUPLEX_HALF;
 
-		cmd->autoneg = AUTONEG_DISABLE;
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->port = 0;
-		cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF;
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported, supported);
 
 		ret = 0;
 	}
@@ -1569,24 +1570,26 @@ smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 
 static int
-smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_set_link_ksettings(struct net_device *dev,
+			       const struct ethtool_link_ksettings *cmd)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	int ret;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irq(&lp->lock);
-		ret = mii_ethtool_sset(&lp->mii, cmd);
+		ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irq(&lp->lock);
 	} else {
-		if (cmd->autoneg != AUTONEG_DISABLE ||
-		    cmd->speed != SPEED_10 ||
-		    (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-		    (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+		if (cmd->base.autoneg != AUTONEG_DISABLE ||
+		    cmd->base.speed != SPEED_10 ||
+		    (cmd->base.duplex != DUPLEX_HALF &&
+		     cmd->base.duplex != DUPLEX_FULL) ||
+		    (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI))
 			return -EINVAL;
 
-//		lp->port = cmd->port;
-		lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+//		lp->port = cmd->base.port;
+		lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
 
 //		if (netif_running(dev))
 //			smc_set_port(dev);
@@ -1744,8 +1747,6 @@ static int smc_ethtool_seteeprom(struct net_device *dev,
 
 
 static const struct ethtool_ops smc_ethtool_ops = {
-	.get_settings	= smc_ethtool_getsettings,
-	.set_settings	= smc_ethtool_setsettings,
 	.get_drvinfo	= smc_ethtool_getdrvinfo,
 
 	.get_msglevel	= smc_ethtool_getmsglevel,
@@ -1755,6 +1756,8 @@ static const struct ethtool_ops smc_ethtool_ops = {
 	.get_eeprom_len = smc_ethtool_geteeprom_len,
 	.get_eeprom	= smc_ethtool_geteeprom,
 	.set_eeprom	= smc_ethtool_seteeprom,
+	.get_link_ksettings	= smc_ethtool_get_link_ksettings,
+	.set_link_ksettings	= smc_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops smc_netdev_ops = {
-- 
cgit v1.2.3


From 15883a43af0bcd10b3f3173bca4a0e60518bc154 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sat, 4 Mar 2017 16:16:12 +0100
Subject: net: sun: cassini: use new api ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/cassini.c | 98 ++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 0e8e89f17dbb..382993c1561c 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -691,7 +691,8 @@ static void cas_mif_poll(struct cas *cp, const int enable)
 }
 
 /* Must be invoked under cp->lock */
-static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
+static void cas_begin_auto_negotiation(struct cas *cp,
+				       const struct ethtool_link_ksettings *ep)
 {
 	u16 ctl;
 #if 1
@@ -704,16 +705,16 @@ static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
 	if (!ep)
 		goto start_aneg;
 	lcntl = cp->link_cntl;
-	if (ep->autoneg == AUTONEG_ENABLE)
+	if (ep->base.autoneg == AUTONEG_ENABLE) {
 		cp->link_cntl = BMCR_ANENABLE;
-	else {
-		u32 speed = ethtool_cmd_speed(ep);
+	} else {
+		u32 speed = ep->base.speed;
 		cp->link_cntl = 0;
 		if (speed == SPEED_100)
 			cp->link_cntl |= BMCR_SPEED100;
 		else if (speed == SPEED_1000)
 			cp->link_cntl |= CAS_BMCR_SPEED1000;
-		if (ep->duplex == DUPLEX_FULL)
+		if (ep->base.duplex == DUPLEX_FULL)
 			cp->link_cntl |= BMCR_FULLDPLX;
 	}
 #if 1
@@ -4528,19 +4529,21 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info));
 }
 
-static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cas_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	u16 bmcr;
 	int full_duplex, speed, pause;
 	unsigned long flags;
 	enum link_state linkstate = link_up;
+	u32 supported, advertising;
 
-	cmd->advertising = 0;
-	cmd->supported = SUPPORTED_Autoneg;
+	advertising = 0;
+	supported = SUPPORTED_Autoneg;
 	if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
-		cmd->supported |= SUPPORTED_1000baseT_Full;
-		cmd->advertising |= ADVERTISED_1000baseT_Full;
+		supported |= SUPPORTED_1000baseT_Full;
+		advertising |= ADVERTISED_1000baseT_Full;
 	}
 
 	/* Record PHY settings if HW is on. */
@@ -4548,17 +4551,15 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	bmcr = 0;
 	linkstate = cp->lstate;
 	if (CAS_PHY_MII(cp->phy_type)) {
-		cmd->port = PORT_MII;
-		cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ?
-			XCVR_INTERNAL : XCVR_EXTERNAL;
-		cmd->phy_address = cp->phy_addr;
-		cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII |
+		cmd->base.port = PORT_MII;
+		cmd->base.phy_address = cp->phy_addr;
+		advertising |= ADVERTISED_TP | ADVERTISED_MII |
 			ADVERTISED_10baseT_Half |
 			ADVERTISED_10baseT_Full |
 			ADVERTISED_100baseT_Half |
 			ADVERTISED_100baseT_Full;
 
-		cmd->supported |=
+		supported |=
 			(SUPPORTED_10baseT_Half |
 			 SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half |
@@ -4574,11 +4575,10 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		}
 
 	} else {
-		cmd->port = PORT_FIBRE;
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->phy_address = 0;
-		cmd->supported   |= SUPPORTED_FIBRE;
-		cmd->advertising |= ADVERTISED_FIBRE;
+		cmd->base.port = PORT_FIBRE;
+		cmd->base.phy_address = 0;
+		supported   |= SUPPORTED_FIBRE;
+		advertising |= ADVERTISED_FIBRE;
 
 		if (cp->hw_running) {
 			/* pcs uses the same bits as mii */
@@ -4590,21 +4590,20 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	spin_unlock_irqrestore(&cp->lock, flags);
 
 	if (bmcr & BMCR_ANENABLE) {
-		cmd->advertising |= ADVERTISED_Autoneg;
-		cmd->autoneg = AUTONEG_ENABLE;
-		ethtool_cmd_speed_set(cmd, ((speed == 10) ?
+		advertising |= ADVERTISED_Autoneg;
+		cmd->base.autoneg = AUTONEG_ENABLE;
+		cmd->base.speed =  ((speed == 10) ?
 					    SPEED_10 :
 					    ((speed == 1000) ?
-					     SPEED_1000 : SPEED_100)));
-		cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+					     SPEED_1000 : SPEED_100));
+		cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
-		ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ?
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ?
 					    SPEED_1000 :
 					    ((bmcr & BMCR_SPEED100) ?
-					     SPEED_100 : SPEED_10)));
-		cmd->duplex =
-			(bmcr & BMCR_FULLDPLX) ?
+					     SPEED_100 : SPEED_10));
+		cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
 	if (linkstate != link_up) {
@@ -4619,39 +4618,46 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		 * settings that we configured.
 		 */
 		if (cp->link_cntl & BMCR_ANENABLE) {
-			ethtool_cmd_speed_set(cmd, 0);
-			cmd->duplex = 0xff;
+			cmd->base.speed = 0;
+			cmd->base.duplex = 0xff;
 		} else {
-			ethtool_cmd_speed_set(cmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 			if (cp->link_cntl & BMCR_SPEED100) {
-				ethtool_cmd_speed_set(cmd, SPEED_100);
+				cmd->base.speed = SPEED_100;
 			} else if (cp->link_cntl & CAS_BMCR_SPEED1000) {
-				ethtool_cmd_speed_set(cmd, SPEED_1000);
+				cmd->base.speed = SPEED_1000;
 			}
-			cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)?
+			cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 		}
 	}
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	return 0;
 }
 
-static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cas_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	unsigned long flags;
-	u32 speed = ethtool_cmd_speed(cmd);
+	u32 speed = cmd->base.speed;
 
 	/* Verify the settings we care about. */
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_DISABLE &&
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->duplex != DUPLEX_HALF &&
-	      cmd->duplex != DUPLEX_FULL)))
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -4753,8 +4759,6 @@ static void cas_get_ethtool_stats(struct net_device *dev,
 
 static const struct ethtool_ops cas_ethtool_ops = {
 	.get_drvinfo		= cas_get_drvinfo,
-	.get_settings		= cas_get_settings,
-	.set_settings		= cas_set_settings,
 	.nway_reset		= cas_nway_reset,
 	.get_link		= cas_get_link,
 	.get_msglevel		= cas_get_msglevel,
@@ -4764,6 +4768,8 @@ static const struct ethtool_ops cas_ethtool_ops = {
 	.get_sset_count		= cas_get_sset_count,
 	.get_strings		= cas_get_strings,
 	.get_ethtool_stats	= cas_get_ethtool_stats,
+	.get_link_ksettings	= cas_get_link_ksettings,
+	.set_link_ksettings	= cas_set_link_ksettings,
 };
 
 static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-- 
cgit v1.2.3


From 56c07e9501e875883584c00fe6977c3addb1f873 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sat, 4 Mar 2017 17:50:06 +0100
Subject: net: sun: niu: use new api ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/niu.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 57978056b336..2dcca249eb9c 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6813,7 +6813,8 @@ static void niu_get_drvinfo(struct net_device *dev,
 			sizeof(info->bus_info));
 }
 
-static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int niu_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp;
@@ -6821,28 +6822,30 @@ static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	lp = &np->link_config;
 
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->phy_address = np->phy_addr;
-	cmd->supported = lp->supported;
-	cmd->advertising = lp->active_advertising;
-	cmd->autoneg = lp->active_autoneg;
-	ethtool_cmd_speed_set(cmd, lp->active_speed);
-	cmd->duplex = lp->active_duplex;
-	cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
-	cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ?
-		XCVR_EXTERNAL : XCVR_INTERNAL;
+	cmd->base.phy_address = np->phy_addr;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						lp->supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						lp->active_advertising);
+	cmd->base.autoneg = lp->active_autoneg;
+	cmd->base.speed = lp->active_speed;
+	cmd->base.duplex = lp->active_duplex;
+	cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
 
 	return 0;
 }
 
-static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int niu_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp = &np->link_config;
 
-	lp->advertising = cmd->advertising;
-	lp->speed = ethtool_cmd_speed(cmd);
-	lp->duplex = cmd->duplex;
-	lp->autoneg = cmd->autoneg;
+	ethtool_convert_link_mode_to_legacy_u32(&lp->advertising,
+						cmd->link_modes.advertising);
+	lp->speed = cmd->base.speed;
+	lp->duplex = cmd->base.duplex;
+	lp->autoneg = cmd->base.autoneg;
 	return niu_init_link(np);
 }
 
@@ -7902,14 +7905,14 @@ static const struct ethtool_ops niu_ethtool_ops = {
 	.nway_reset		= niu_nway_reset,
 	.get_eeprom_len		= niu_get_eeprom_len,
 	.get_eeprom		= niu_get_eeprom,
-	.get_settings		= niu_get_settings,
-	.set_settings		= niu_set_settings,
 	.get_strings		= niu_get_strings,
 	.get_sset_count		= niu_get_sset_count,
 	.get_ethtool_stats	= niu_get_ethtool_stats,
 	.set_phys_id		= niu_set_phys_id,
 	.get_rxnfc		= niu_get_nfc,
 	.set_rxnfc		= niu_set_nfc,
+	.get_link_ksettings	= niu_get_link_ksettings,
+	.set_link_ksettings	= niu_set_link_ksettings,
 };
 
 static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent,
-- 
cgit v1.2.3


From 9dff2defef3ce1933a44d59ec139987e19645841 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 00:04:18 +0100
Subject: net: sun: sungem: use new api ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/sungem.c | 98 +++++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 5c5952e782cd..dbfca0466760 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1250,12 +1250,17 @@ static void gem_stop_dma(struct gem *gp)
 
 
 // XXX dbl check what that function should do when called on PCS PHY
-static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
+static void gem_begin_auto_negotiation(struct gem *gp,
+				       const struct ethtool_link_ksettings *ep)
 {
 	u32 advertise, features;
 	int autoneg;
 	int speed;
 	int duplex;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						ep->link_modes.advertising);
 
 	if (gp->phy_type != phy_mii_mdio0 &&
      	    gp->phy_type != phy_mii_mdio1)
@@ -1278,13 +1283,13 @@ static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 	/* Setup link parameters */
 	if (!ep)
 		goto start_aneg;
-	if (ep->autoneg == AUTONEG_ENABLE) {
-		advertise = ep->advertising;
+	if (ep->base.autoneg == AUTONEG_ENABLE) {
+		advertise = advertising;
 		autoneg = 1;
 	} else {
 		autoneg = 0;
-		speed = ethtool_cmd_speed(ep);
-		duplex = ep->duplex;
+		speed = ep->base.speed;
+		duplex = ep->base.duplex;
 	}
 
 start_aneg:
@@ -2515,85 +2520,96 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info));
 }
 
-static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int gem_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
+	u32 supported, advertising;
 
 	if (gp->phy_type == phy_mii_mdio0 ||
 	    gp->phy_type == phy_mii_mdio1) {
 		if (gp->phy_mii.def)
-			cmd->supported = gp->phy_mii.def->features;
+			supported = gp->phy_mii.def->features;
 		else
-			cmd->supported = (SUPPORTED_10baseT_Half |
+			supported = (SUPPORTED_10baseT_Half |
 					  SUPPORTED_10baseT_Full);
 
 		/* XXX hardcoded stuff for now */
-		cmd->port = PORT_MII;
-		cmd->transceiver = XCVR_EXTERNAL;
-		cmd->phy_address = 0; /* XXX fixed PHYAD */
+		cmd->base.port = PORT_MII;
+		cmd->base.phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		cmd->autoneg = gp->want_autoneg;
-		ethtool_cmd_speed_set(cmd, gp->phy_mii.speed);
-		cmd->duplex = gp->phy_mii.duplex;
-		cmd->advertising = gp->phy_mii.advertising;
+		cmd->base.autoneg = gp->want_autoneg;
+		cmd->base.speed = gp->phy_mii.speed;
+		cmd->base.duplex = gp->phy_mii.duplex;
+		advertising = gp->phy_mii.advertising;
 
 		/* If we started with a forced mode, we don't have a default
 		 * advertise set, we need to return something sensible so
 		 * userland can re-enable autoneg properly.
 		 */
-		if (cmd->advertising == 0)
-			cmd->advertising = cmd->supported;
+		if (advertising == 0)
+			advertising = supported;
 	} else { // XXX PCS ?
-		cmd->supported =
+		supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			 SUPPORTED_Autoneg);
-		cmd->advertising = cmd->supported;
-		ethtool_cmd_speed_set(cmd, 0);
-		cmd->duplex = cmd->port = cmd->phy_address =
-			cmd->transceiver = cmd->autoneg = 0;
+		advertising = supported;
+		cmd->base.speed = 0;
+		cmd->base.duplex = 0;
+		cmd->base.port = 0;
+		cmd->base.phy_address = 0;
+		cmd->base.autoneg = 0;
 
 		/* serdes means usually a Fibre connector, with most fixed */
 		if (gp->phy_type == phy_serdes) {
-			cmd->port = PORT_FIBRE;
-			cmd->supported = (SUPPORTED_1000baseT_Half |
+			cmd->base.port = PORT_FIBRE;
+			supported = (SUPPORTED_1000baseT_Half |
 				SUPPORTED_1000baseT_Full |
 				SUPPORTED_FIBRE | SUPPORTED_Autoneg |
 				SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-			cmd->advertising = cmd->supported;
-			cmd->transceiver = XCVR_INTERNAL;
+			advertising = supported;
 			if (gp->lstate == link_up)
-				ethtool_cmd_speed_set(cmd, SPEED_1000);
-			cmd->duplex = DUPLEX_FULL;
-			cmd->autoneg = 1;
+				cmd->base.speed = SPEED_1000;
+			cmd->base.duplex = DUPLEX_FULL;
+			cmd->base.autoneg = 1;
 		}
 	}
-	cmd->maxtxpkt = cmd->maxrxpkt = 0;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
-static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int gem_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
-	u32 speed = ethtool_cmd_speed(cmd);
+	u32 speed = cmd->base.speed;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
 	/* Verify the settings we care about. */
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_ENABLE &&
-	    cmd->advertising == 0)
+	if (cmd->base.autoneg == AUTONEG_ENABLE &&
+	    advertising == 0)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_DISABLE &&
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->duplex != DUPLEX_HALF &&
-	      cmd->duplex != DUPLEX_FULL)))
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -2666,13 +2682,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static const struct ethtool_ops gem_ethtool_ops = {
 	.get_drvinfo		= gem_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
-	.get_settings		= gem_get_settings,
-	.set_settings		= gem_set_settings,
 	.nway_reset		= gem_nway_reset,
 	.get_msglevel		= gem_get_msglevel,
 	.set_msglevel		= gem_set_msglevel,
 	.get_wol		= gem_get_wol,
 	.set_wol		= gem_set_wol,
+	.get_link_ksettings	= gem_get_link_ksettings,
+	.set_link_ksettings	= gem_set_link_ksettings,
 };
 
 static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-- 
cgit v1.2.3


From 8103015df56d97cc3a4167d2e070ee14cc022bae Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 22:25:39 +0100
Subject: net: sun: sunhme: use new api ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/sunhme.c | 62 +++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 72ff05cd3ed8..53ff66ef53ac 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -1294,9 +1294,10 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 }
 
 /* hp->happy_lock must be held */
-static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
-					      void __iomem *tregs,
-					      struct ethtool_cmd *ep)
+static void
+happy_meal_begin_auto_negotiation(struct happy_meal *hp,
+				  void __iomem *tregs,
+				  const struct ethtool_link_ksettings *ep)
 {
 	int timeout;
 
@@ -1309,7 +1310,7 @@ static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
 	/* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */
 
 	hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE);
-	if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
+	if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
 		/* Advertise everything we can support. */
 		if (hp->sw_bmsr & BMSR_10HALF)
 			hp->sw_advertise |= (ADVERTISE_10HALF);
@@ -1384,14 +1385,14 @@ force_link:
 		/* Disable auto-negotiation in BMCR, enable the duplex and
 		 * speed setting, init the timer state machine, and fire it off.
 		 */
-		if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
+		if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
 			hp->sw_bmcr = BMCR_SPEED100;
 		} else {
-			if (ethtool_cmd_speed(ep) == SPEED_100)
+			if (ep->base.speed == SPEED_100)
 				hp->sw_bmcr = BMCR_SPEED100;
 			else
 				hp->sw_bmcr = 0;
-			if (ep->duplex == DUPLEX_FULL)
+			if (ep->base.duplex == DUPLEX_FULL)
 				hp->sw_bmcr |= BMCR_FULLDPLX;
 		}
 		happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr);
@@ -2434,20 +2435,21 @@ static void happy_meal_set_multicast(struct net_device *dev)
 }
 
 /* Ethtool support... */
-static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int hme_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 	u32 speed;
+	u32 supported;
 
-	cmd->supported =
+	supported =
 		(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 		 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 		 SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
 
 	/* XXX hardcoded stuff for now */
-	cmd->port = PORT_TP; /* XXX no MII support */
-	cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */
-	cmd->phy_address = 0; /* XXX fixed PHYAD */
+	cmd->base.port = PORT_TP; /* XXX no MII support */
+	cmd->base.phy_address = 0; /* XXX fixed PHYAD */
 
 	/* Record PHY settings. */
 	spin_lock_irq(&hp->happy_lock);
@@ -2456,41 +2458,45 @@ static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	spin_unlock_irq(&hp->happy_lock);
 
 	if (hp->sw_bmcr & BMCR_ANENABLE) {
-		cmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 		speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ?
 			 SPEED_100 : SPEED_10);
 		if (speed == SPEED_100)
-			cmd->duplex =
+			cmd->base.duplex =
 				(hp->sw_lpa & (LPA_100FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 		else
-			cmd->duplex =
+			cmd->base.duplex =
 				(hp->sw_lpa & (LPA_10FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
 		speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-		cmd->duplex =
+		cmd->base.duplex =
 			(hp->sw_bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
-	ethtool_cmd_speed_set(cmd, speed);
+	cmd->base.speed = speed;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+
 	return 0;
 }
 
-static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int hme_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 
 	/* Verify the settings we care about. */
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
-	if (cmd->autoneg == AUTONEG_DISABLE &&
-	    ((ethtool_cmd_speed(cmd) != SPEED_100 &&
-	      ethtool_cmd_speed(cmd) != SPEED_10) ||
-	     (cmd->duplex != DUPLEX_HALF &&
-	      cmd->duplex != DUPLEX_FULL)))
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	    ((cmd->base.speed != SPEED_100 &&
+	      cmd->base.speed != SPEED_10) ||
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Ok, do it to it. */
@@ -2537,10 +2543,10 @@ static u32 hme_get_link(struct net_device *dev)
 }
 
 static const struct ethtool_ops hme_ethtool_ops = {
-	.get_settings		= hme_get_settings,
-	.set_settings		= hme_set_settings,
 	.get_drvinfo		= hme_get_drvinfo,
 	.get_link		= hme_get_link,
+	.get_link_ksettings	= hme_get_link_ksettings,
+	.set_link_ksettings	= hme_set_link_ksettings,
 };
 
 static int hme_version_printed;
-- 
cgit v1.2.3


From f441df6b9ff41e1af7289b69d5ce379053f900a5 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 23:21:06 +0100
Subject: net: toshiba: ps3_genic_net: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Tested-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 51 ++++++++++++++++------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 72013314bba8..fa6a06571187 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1206,61 +1206,68 @@ void gelic_net_get_drvinfo(struct net_device *netdev,
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int gelic_ether_get_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
+static int gelic_ether_get_link_ksettings(struct net_device *netdev,
+					  struct ethtool_link_ksettings *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
+	u32 supported, advertising;
 
 	gelic_card_get_ether_port_status(card, 0);
 
 	if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX)
-		cmd->duplex = DUPLEX_FULL;
+		cmd->base.duplex = DUPLEX_FULL;
 	else
-		cmd->duplex = DUPLEX_HALF;
+		cmd->base.duplex = DUPLEX_HALF;
 
 	switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) {
 	case GELIC_LV1_ETHER_SPEED_10:
-		ethtool_cmd_speed_set(cmd, SPEED_10);
+		cmd->base.speed = SPEED_10;
 		break;
 	case GELIC_LV1_ETHER_SPEED_100:
-		ethtool_cmd_speed_set(cmd, SPEED_100);
+		cmd->base.speed = SPEED_100;
 		break;
 	case GELIC_LV1_ETHER_SPEED_1000:
-		ethtool_cmd_speed_set(cmd, SPEED_1000);
+		cmd->base.speed = SPEED_1000;
 		break;
 	default:
 		pr_info("%s: speed unknown\n", __func__);
-		ethtool_cmd_speed_set(cmd, SPEED_10);
+		cmd->base.speed = SPEED_10;
 		break;
 	}
 
-	cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg |
+	supported = SUPPORTED_TP | SUPPORTED_Autoneg |
 			SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Full;
-	cmd->advertising = cmd->supported;
+	advertising = supported;
 	if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) {
-		cmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
-		cmd->advertising &= ~ADVERTISED_Autoneg;
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		advertising &= ~ADVERTISED_Autoneg;
 	}
-	cmd->port = PORT_TP;
+	cmd->base.port = PORT_TP;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
-static int gelic_ether_set_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
+static int
+gelic_ether_set_link_ksettings(struct net_device *netdev,
+			       const struct ethtool_link_ksettings *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
 	u64 mode;
 	int ret;
 
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		mode = GELIC_LV1_ETHER_AUTO_NEG;
 	} else {
-		switch (cmd->speed) {
+		switch (cmd->base.speed) {
 		case SPEED_10:
 			mode = GELIC_LV1_ETHER_SPEED_10;
 			break;
@@ -1273,9 +1280,9 @@ static int gelic_ether_set_settings(struct net_device *netdev,
 		default:
 			return -EINVAL;
 		}
-		if (cmd->duplex == DUPLEX_FULL)
+		if (cmd->base.duplex == DUPLEX_FULL) {
 			mode |= GELIC_LV1_ETHER_FULL_DUPLEX;
-		else if (cmd->speed == SPEED_1000) {
+		} else if (cmd->base.speed == SPEED_1000) {
 			pr_info("1000 half duplex is not supported.\n");
 			return -EINVAL;
 		}
@@ -1370,11 +1377,11 @@ done:
 
 static const struct ethtool_ops gelic_ether_ethtool_ops = {
 	.get_drvinfo	= gelic_net_get_drvinfo,
-	.get_settings	= gelic_ether_get_settings,
-	.set_settings	= gelic_ether_set_settings,
 	.get_link	= ethtool_op_get_link,
 	.get_wol	= gelic_net_get_wol,
 	.set_wol	= gelic_net_set_wol,
+	.get_link_ksettings = gelic_ether_get_link_ksettings,
+	.set_link_ksettings = gelic_ether_set_link_ksettings,
 };
 
 /**
-- 
cgit v1.2.3


From 50ad480e4d64fde941fcac39db1ab29a83d86703 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 23:46:00 +0100
Subject: net: toshiba: spider_net: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/toshiba/spider_net_ethtool.c | 24 +++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
index ffe519382e11..16bd036d0682 100644
--- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c
+++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
@@ -47,19 +47,23 @@ static struct {
 };
 
 static int
-spider_net_ethtool_get_settings(struct net_device *netdev,
-			       struct ethtool_cmd *cmd)
+spider_net_ethtool_get_link_ksettings(struct net_device *netdev,
+				      struct ethtool_link_ksettings *cmd)
 {
 	struct spider_net_card *card;
 	card = netdev_priv(netdev);
 
-	cmd->supported   = (SUPPORTED_1000baseT_Full |
-			     SUPPORTED_FIBRE);
-	cmd->advertising = (ADVERTISED_1000baseT_Full |
-			     ADVERTISED_FIBRE);
-	cmd->port = PORT_FIBRE;
-	ethtool_cmd_speed_set(cmd, card->phy.speed);
-	cmd->duplex = DUPLEX_FULL;
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+
+	cmd->base.port = PORT_FIBRE;
+	cmd->base.speed = card->phy.speed;
+	cmd->base.duplex = DUPLEX_FULL;
 
 	return 0;
 }
@@ -166,7 +170,6 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 const struct ethtool_ops spider_net_ethtool_ops = {
-	.get_settings		= spider_net_ethtool_get_settings,
 	.get_drvinfo		= spider_net_ethtool_get_drvinfo,
 	.get_wol		= spider_net_ethtool_get_wol,
 	.get_msglevel		= spider_net_ethtool_get_msglevel,
@@ -177,5 +180,6 @@ const struct ethtool_ops spider_net_ethtool_ops = {
 	.get_strings		= spider_net_get_strings,
 	.get_sset_count		= spider_net_get_sset_count,
 	.get_ethtool_stats	= spider_net_get_ethtool_stats,
+	.get_link_ksettings	= spider_net_ethtool_get_link_ksettings,
 };
 
-- 
cgit v1.2.3


From b793f081674e363f71699b0b32fc9a1890e52db2 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 6 Mar 2017 14:34:27 +0100
Subject: net: ibm: emac: fix regression caused by emac_dt_phy_probe()

Julian Margetson reported a panic on his SAM460EX with Kernel 4.11-rc1:
| Unable to handle kernel paging request for data at address 0x00000014
| Oops: Kernel access of bad area, sig: 11 [#1]
| PREEMPT
| Canyonlands
| Modules linked in:
| CPU: 0 PID: 1 Comm: swapper Not tainted [...]
| task: ea838000 task.stack: ea836000
| NIP: c0599f5c LR: c0599dd8 CTR: 00000000
| REGS: ea837c80 TRAP: 0300   Not tainted [...]
| MSR: 00029000 <CE,EE,ME>
|  CR: 24371242  XER: 20000000
| DEAR: 00000014 ESR: 00000000
| GPR00: c0599ce8 ea837d30 ea838000 c0e52dcc c0d56ffb [...]
| NIP [c0599f5c] emac_probe+0xfb4/0x1304
| LR [c0599dd8] emac_probe+0xe30/0x1304
| Call Trace:
| [ea837d30] [c0599ce8] emac_probe+0xd40/0x1304 (unreliable)
| [ea837d80] [c0533504] platform_drv_probe+0x48/0x90
| [ea837da0] [c0531c14] driver_probe_device+0x15c/0x2c4
| [ea837dd0] [c0531e04] __driver_attach+0x88/0xb0
| ---[ end trace ... ]---

The problem is caused by emac_dt_phy_probe() returing success (0)
for existing device-trees configurations that do not specify a
"phy-handle" property. This caused the code to skip the existing
phy probe and setup. Which led to essential phy related
data-structures being uninitialized.

This patch also removes the unused variable in emac_dt_phy_connect().

Fixes: a577ca6badb5261d ("net: emac: add support for device-tree based PHY discovery and setup")
Reported-by: Julian Margetson <runaway@candw.ms>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/core.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 275c2e2349ad..c44036d5761a 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2589,8 +2589,6 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
 static int emac_dt_phy_connect(struct emac_instance *dev,
 			       struct device_node *phy_handle)
 {
-	int res;
-
 	dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
 				    GFP_KERNEL);
 	if (!dev->phy.def)
@@ -2617,7 +2615,7 @@ static int emac_dt_phy_probe(struct emac_instance *dev)
 {
 	struct device_node *np = dev->ofdev->dev.of_node;
 	struct device_node *phy_handle;
-	int res = 0;
+	int res = 1;
 
 	phy_handle = of_parse_phandle(np, "phy-handle", 0);
 
@@ -2714,13 +2712,24 @@ static int emac_init_phy(struct emac_instance *dev)
 	if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) {
 		int res = emac_dt_phy_probe(dev);
 
-		mutex_unlock(&emac_phy_map_lock);
-		if (!res)
+		switch (res) {
+		case 1:
+			/* No phy-handle property configured.
+			 * Continue with the existing phy probe
+			 * and setup code.
+			 */
+			break;
+
+		case 0:
+			mutex_unlock(&emac_phy_map_lock);
 			goto init_phy;
 
-		dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
-			res);
-		return res;
+		default:
+			mutex_unlock(&emac_phy_map_lock);
+			dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
+				res);
+			return res;
+		}
 	}
 
 	if (dev->phy_address != 0xffffffff)
-- 
cgit v1.2.3


From aac1561ad98e74f6ea43337f9b9714ab0b1f493e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 8 Mar 2017 22:17:10 -0800
Subject: net: Revert ksettings conversions.

Those were supposed to go into the net-next tree not
the net tree.  Oops...

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc911x.c               | 51 ++++++------
 drivers/net/ethernet/sun/cassini.c                | 98 +++++++++++------------
 drivers/net/ethernet/sun/niu.c                    | 37 ++++-----
 drivers/net/ethernet/sun/sungem.c                 | 98 ++++++++++-------------
 drivers/net/ethernet/sun/sunhme.c                 | 62 +++++++-------
 drivers/net/ethernet/toshiba/ps3_gelic_net.c      | 51 +++++-------
 drivers/net/ethernet/toshiba/spider_net_ethtool.c | 24 +++---
 7 files changed, 188 insertions(+), 233 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 36307d34f641..4f19c6166182 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc911x_ethtool_get_link_ksettings(struct net_device *dev,
-				   struct ethtool_link_ksettings *cmd)
+smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret, status;
 	unsigned long flags;
-	u32 supported;
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
+	cmd->maxtxpkt = 1;
+	cmd->maxrxpkt = 1;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
+		ret = mii_ethtool_gset(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		supported = SUPPORTED_10baseT_Half |
+		cmd->supported = SUPPORTED_10baseT_Half |
 				SUPPORTED_10baseT_Full |
 				SUPPORTED_TP | SUPPORTED_AUI;
 
 		if (lp->ctl_rspeed == 10)
-			cmd->base.speed = SPEED_10;
+			ethtool_cmd_speed_set(cmd, SPEED_10);
 		else if (lp->ctl_rspeed == 100)
-			cmd->base.speed = SPEED_100;
-
-		cmd->base.autoneg = AUTONEG_DISABLE;
-		cmd->base.port = 0;
+			ethtool_cmd_speed_set(cmd, SPEED_100);
+
+		cmd->autoneg = AUTONEG_DISABLE;
+		if (lp->mii.phy_id==1)
+			cmd->transceiver = XCVR_INTERNAL;
+		else
+			cmd->transceiver = XCVR_EXTERNAL;
+		cmd->port = 0;
 		SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status);
-		cmd->base.duplex =
+		cmd->duplex =
 			(status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
-
-		ethtool_convert_legacy_u32_to_link_mode(
-			cmd->link_modes.supported, supported);
-
 		ret = 0;
 	}
 
@@ -1487,8 +1487,7 @@ smc911x_ethtool_get_link_ksettings(struct net_device *dev,
 }
 
 static int
-smc911x_ethtool_set_link_ksettings(struct net_device *dev,
-				   const struct ethtool_link_ksettings *cmd)
+smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1496,18 +1495,16 @@ smc911x_ethtool_set_link_ksettings(struct net_device *dev,
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
+		ret = mii_ethtool_sset(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		if (cmd->base.autoneg != AUTONEG_DISABLE ||
-		    cmd->base.speed != SPEED_10 ||
-		    (cmd->base.duplex != DUPLEX_HALF &&
-		     cmd->base.duplex != DUPLEX_FULL) ||
-		    (cmd->base.port != PORT_TP &&
-		     cmd->base.port != PORT_AUI))
+		if (cmd->autoneg != AUTONEG_DISABLE ||
+			cmd->speed != SPEED_10 ||
+			(cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
+			(cmd->port != PORT_TP && cmd->port != PORT_AUI))
 			return -EINVAL;
 
-		lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
+		lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
 
 		ret = 0;
 	}
@@ -1689,6 +1686,8 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev)
 }
 
 static const struct ethtool_ops smc911x_ethtool_ops = {
+	.get_settings	 = smc911x_ethtool_getsettings,
+	.set_settings	 = smc911x_ethtool_setsettings,
 	.get_drvinfo	 = smc911x_ethtool_getdrvinfo,
 	.get_msglevel	 = smc911x_ethtool_getmsglevel,
 	.set_msglevel	 = smc911x_ethtool_setmsglevel,
@@ -1699,8 +1698,6 @@ static const struct ethtool_ops smc911x_ethtool_ops = {
 	.get_eeprom_len = smc911x_ethtool_geteeprom_len,
 	.get_eeprom = smc911x_ethtool_geteeprom,
 	.set_eeprom = smc911x_ethtool_seteeprom,
-	.get_link_ksettings	 = smc911x_ethtool_get_link_ksettings,
-	.set_link_ksettings	 = smc911x_ethtool_set_link_ksettings,
 };
 
 /*
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 382993c1561c..0e8e89f17dbb 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -691,8 +691,7 @@ static void cas_mif_poll(struct cas *cp, const int enable)
 }
 
 /* Must be invoked under cp->lock */
-static void cas_begin_auto_negotiation(struct cas *cp,
-				       const struct ethtool_link_ksettings *ep)
+static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
 {
 	u16 ctl;
 #if 1
@@ -705,16 +704,16 @@ static void cas_begin_auto_negotiation(struct cas *cp,
 	if (!ep)
 		goto start_aneg;
 	lcntl = cp->link_cntl;
-	if (ep->base.autoneg == AUTONEG_ENABLE) {
+	if (ep->autoneg == AUTONEG_ENABLE)
 		cp->link_cntl = BMCR_ANENABLE;
-	} else {
-		u32 speed = ep->base.speed;
+	else {
+		u32 speed = ethtool_cmd_speed(ep);
 		cp->link_cntl = 0;
 		if (speed == SPEED_100)
 			cp->link_cntl |= BMCR_SPEED100;
 		else if (speed == SPEED_1000)
 			cp->link_cntl |= CAS_BMCR_SPEED1000;
-		if (ep->base.duplex == DUPLEX_FULL)
+		if (ep->duplex == DUPLEX_FULL)
 			cp->link_cntl |= BMCR_FULLDPLX;
 	}
 #if 1
@@ -4529,21 +4528,19 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info));
 }
 
-static int cas_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	u16 bmcr;
 	int full_duplex, speed, pause;
 	unsigned long flags;
 	enum link_state linkstate = link_up;
-	u32 supported, advertising;
 
-	advertising = 0;
-	supported = SUPPORTED_Autoneg;
+	cmd->advertising = 0;
+	cmd->supported = SUPPORTED_Autoneg;
 	if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
-		supported |= SUPPORTED_1000baseT_Full;
-		advertising |= ADVERTISED_1000baseT_Full;
+		cmd->supported |= SUPPORTED_1000baseT_Full;
+		cmd->advertising |= ADVERTISED_1000baseT_Full;
 	}
 
 	/* Record PHY settings if HW is on. */
@@ -4551,15 +4548,17 @@ static int cas_get_link_ksettings(struct net_device *dev,
 	bmcr = 0;
 	linkstate = cp->lstate;
 	if (CAS_PHY_MII(cp->phy_type)) {
-		cmd->base.port = PORT_MII;
-		cmd->base.phy_address = cp->phy_addr;
-		advertising |= ADVERTISED_TP | ADVERTISED_MII |
+		cmd->port = PORT_MII;
+		cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ?
+			XCVR_INTERNAL : XCVR_EXTERNAL;
+		cmd->phy_address = cp->phy_addr;
+		cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII |
 			ADVERTISED_10baseT_Half |
 			ADVERTISED_10baseT_Full |
 			ADVERTISED_100baseT_Half |
 			ADVERTISED_100baseT_Full;
 
-		supported |=
+		cmd->supported |=
 			(SUPPORTED_10baseT_Half |
 			 SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half |
@@ -4575,10 +4574,11 @@ static int cas_get_link_ksettings(struct net_device *dev,
 		}
 
 	} else {
-		cmd->base.port = PORT_FIBRE;
-		cmd->base.phy_address = 0;
-		supported   |= SUPPORTED_FIBRE;
-		advertising |= ADVERTISED_FIBRE;
+		cmd->port = PORT_FIBRE;
+		cmd->transceiver = XCVR_INTERNAL;
+		cmd->phy_address = 0;
+		cmd->supported   |= SUPPORTED_FIBRE;
+		cmd->advertising |= ADVERTISED_FIBRE;
 
 		if (cp->hw_running) {
 			/* pcs uses the same bits as mii */
@@ -4590,20 +4590,21 @@ static int cas_get_link_ksettings(struct net_device *dev,
 	spin_unlock_irqrestore(&cp->lock, flags);
 
 	if (bmcr & BMCR_ANENABLE) {
-		advertising |= ADVERTISED_Autoneg;
-		cmd->base.autoneg = AUTONEG_ENABLE;
-		cmd->base.speed =  ((speed == 10) ?
+		cmd->advertising |= ADVERTISED_Autoneg;
+		cmd->autoneg = AUTONEG_ENABLE;
+		ethtool_cmd_speed_set(cmd, ((speed == 10) ?
 					    SPEED_10 :
 					    ((speed == 1000) ?
-					     SPEED_1000 : SPEED_100));
-		cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+					     SPEED_1000 : SPEED_100)));
+		cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->base.autoneg = AUTONEG_DISABLE;
-		cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ?
+		cmd->autoneg = AUTONEG_DISABLE;
+		ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ?
 					    SPEED_1000 :
 					    ((bmcr & BMCR_SPEED100) ?
-					     SPEED_100 : SPEED_10));
-		cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
+					     SPEED_100 : SPEED_10)));
+		cmd->duplex =
+			(bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
 	if (linkstate != link_up) {
@@ -4618,46 +4619,39 @@ static int cas_get_link_ksettings(struct net_device *dev,
 		 * settings that we configured.
 		 */
 		if (cp->link_cntl & BMCR_ANENABLE) {
-			cmd->base.speed = 0;
-			cmd->base.duplex = 0xff;
+			ethtool_cmd_speed_set(cmd, 0);
+			cmd->duplex = 0xff;
 		} else {
-			cmd->base.speed = SPEED_10;
+			ethtool_cmd_speed_set(cmd, SPEED_10);
 			if (cp->link_cntl & BMCR_SPEED100) {
-				cmd->base.speed = SPEED_100;
+				ethtool_cmd_speed_set(cmd, SPEED_100);
 			} else if (cp->link_cntl & CAS_BMCR_SPEED1000) {
-				cmd->base.speed = SPEED_1000;
+				ethtool_cmd_speed_set(cmd, SPEED_1000);
 			}
-			cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ?
+			cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)?
 				DUPLEX_FULL : DUPLEX_HALF;
 		}
 	}
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
-
 	return 0;
 }
 
-static int cas_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	unsigned long flags;
-	u32 speed = cmd->base.speed;
+	u32 speed = ethtool_cmd_speed(cmd);
 
 	/* Verify the settings we care about. */
-	if (cmd->base.autoneg != AUTONEG_ENABLE &&
-	    cmd->base.autoneg != AUTONEG_DISABLE)
+	if (cmd->autoneg != AUTONEG_ENABLE &&
+	    cmd->autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	if (cmd->autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->base.duplex != DUPLEX_HALF &&
-	      cmd->base.duplex != DUPLEX_FULL)))
+	     (cmd->duplex != DUPLEX_HALF &&
+	      cmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -4759,6 +4753,8 @@ static void cas_get_ethtool_stats(struct net_device *dev,
 
 static const struct ethtool_ops cas_ethtool_ops = {
 	.get_drvinfo		= cas_get_drvinfo,
+	.get_settings		= cas_get_settings,
+	.set_settings		= cas_set_settings,
 	.nway_reset		= cas_nway_reset,
 	.get_link		= cas_get_link,
 	.get_msglevel		= cas_get_msglevel,
@@ -4768,8 +4764,6 @@ static const struct ethtool_ops cas_ethtool_ops = {
 	.get_sset_count		= cas_get_sset_count,
 	.get_strings		= cas_get_strings,
 	.get_ethtool_stats	= cas_get_ethtool_stats,
-	.get_link_ksettings	= cas_get_link_ksettings,
-	.set_link_ksettings	= cas_set_link_ksettings,
 };
 
 static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 2dcca249eb9c..57978056b336 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6813,8 +6813,7 @@ static void niu_get_drvinfo(struct net_device *dev,
 			sizeof(info->bus_info));
 }
 
-static int niu_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp;
@@ -6822,30 +6821,28 @@ static int niu_get_link_ksettings(struct net_device *dev,
 	lp = &np->link_config;
 
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->base.phy_address = np->phy_addr;
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						lp->supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						lp->active_advertising);
-	cmd->base.autoneg = lp->active_autoneg;
-	cmd->base.speed = lp->active_speed;
-	cmd->base.duplex = lp->active_duplex;
-	cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
+	cmd->phy_address = np->phy_addr;
+	cmd->supported = lp->supported;
+	cmd->advertising = lp->active_advertising;
+	cmd->autoneg = lp->active_autoneg;
+	ethtool_cmd_speed_set(cmd, lp->active_speed);
+	cmd->duplex = lp->active_duplex;
+	cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
+	cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ?
+		XCVR_EXTERNAL : XCVR_INTERNAL;
 
 	return 0;
 }
 
-static int niu_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp = &np->link_config;
 
-	ethtool_convert_link_mode_to_legacy_u32(&lp->advertising,
-						cmd->link_modes.advertising);
-	lp->speed = cmd->base.speed;
-	lp->duplex = cmd->base.duplex;
-	lp->autoneg = cmd->base.autoneg;
+	lp->advertising = cmd->advertising;
+	lp->speed = ethtool_cmd_speed(cmd);
+	lp->duplex = cmd->duplex;
+	lp->autoneg = cmd->autoneg;
 	return niu_init_link(np);
 }
 
@@ -7905,14 +7902,14 @@ static const struct ethtool_ops niu_ethtool_ops = {
 	.nway_reset		= niu_nway_reset,
 	.get_eeprom_len		= niu_get_eeprom_len,
 	.get_eeprom		= niu_get_eeprom,
+	.get_settings		= niu_get_settings,
+	.set_settings		= niu_set_settings,
 	.get_strings		= niu_get_strings,
 	.get_sset_count		= niu_get_sset_count,
 	.get_ethtool_stats	= niu_get_ethtool_stats,
 	.set_phys_id		= niu_set_phys_id,
 	.get_rxnfc		= niu_get_nfc,
 	.set_rxnfc		= niu_set_nfc,
-	.get_link_ksettings	= niu_get_link_ksettings,
-	.set_link_ksettings	= niu_set_link_ksettings,
 };
 
 static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent,
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index dbfca0466760..5c5952e782cd 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1250,17 +1250,12 @@ static void gem_stop_dma(struct gem *gp)
 
 
 // XXX dbl check what that function should do when called on PCS PHY
-static void gem_begin_auto_negotiation(struct gem *gp,
-				       const struct ethtool_link_ksettings *ep)
+static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
 	int autoneg;
 	int speed;
 	int duplex;
-	u32 advertising;
-
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						ep->link_modes.advertising);
 
 	if (gp->phy_type != phy_mii_mdio0 &&
      	    gp->phy_type != phy_mii_mdio1)
@@ -1283,13 +1278,13 @@ static void gem_begin_auto_negotiation(struct gem *gp,
 	/* Setup link parameters */
 	if (!ep)
 		goto start_aneg;
-	if (ep->base.autoneg == AUTONEG_ENABLE) {
-		advertise = advertising;
+	if (ep->autoneg == AUTONEG_ENABLE) {
+		advertise = ep->advertising;
 		autoneg = 1;
 	} else {
 		autoneg = 0;
-		speed = ep->base.speed;
-		duplex = ep->base.duplex;
+		speed = ethtool_cmd_speed(ep);
+		duplex = ep->duplex;
 	}
 
 start_aneg:
@@ -2520,96 +2515,85 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info));
 }
 
-static int gem_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
-	u32 supported, advertising;
 
 	if (gp->phy_type == phy_mii_mdio0 ||
 	    gp->phy_type == phy_mii_mdio1) {
 		if (gp->phy_mii.def)
-			supported = gp->phy_mii.def->features;
+			cmd->supported = gp->phy_mii.def->features;
 		else
-			supported = (SUPPORTED_10baseT_Half |
+			cmd->supported = (SUPPORTED_10baseT_Half |
 					  SUPPORTED_10baseT_Full);
 
 		/* XXX hardcoded stuff for now */
-		cmd->base.port = PORT_MII;
-		cmd->base.phy_address = 0; /* XXX fixed PHYAD */
+		cmd->port = PORT_MII;
+		cmd->transceiver = XCVR_EXTERNAL;
+		cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		cmd->base.autoneg = gp->want_autoneg;
-		cmd->base.speed = gp->phy_mii.speed;
-		cmd->base.duplex = gp->phy_mii.duplex;
-		advertising = gp->phy_mii.advertising;
+		cmd->autoneg = gp->want_autoneg;
+		ethtool_cmd_speed_set(cmd, gp->phy_mii.speed);
+		cmd->duplex = gp->phy_mii.duplex;
+		cmd->advertising = gp->phy_mii.advertising;
 
 		/* If we started with a forced mode, we don't have a default
 		 * advertise set, we need to return something sensible so
 		 * userland can re-enable autoneg properly.
 		 */
-		if (advertising == 0)
-			advertising = supported;
+		if (cmd->advertising == 0)
+			cmd->advertising = cmd->supported;
 	} else { // XXX PCS ?
-		supported =
+		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			 SUPPORTED_Autoneg);
-		advertising = supported;
-		cmd->base.speed = 0;
-		cmd->base.duplex = 0;
-		cmd->base.port = 0;
-		cmd->base.phy_address = 0;
-		cmd->base.autoneg = 0;
+		cmd->advertising = cmd->supported;
+		ethtool_cmd_speed_set(cmd, 0);
+		cmd->duplex = cmd->port = cmd->phy_address =
+			cmd->transceiver = cmd->autoneg = 0;
 
 		/* serdes means usually a Fibre connector, with most fixed */
 		if (gp->phy_type == phy_serdes) {
-			cmd->base.port = PORT_FIBRE;
-			supported = (SUPPORTED_1000baseT_Half |
+			cmd->port = PORT_FIBRE;
+			cmd->supported = (SUPPORTED_1000baseT_Half |
 				SUPPORTED_1000baseT_Full |
 				SUPPORTED_FIBRE | SUPPORTED_Autoneg |
 				SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-			advertising = supported;
+			cmd->advertising = cmd->supported;
+			cmd->transceiver = XCVR_INTERNAL;
 			if (gp->lstate == link_up)
-				cmd->base.speed = SPEED_1000;
-			cmd->base.duplex = DUPLEX_FULL;
-			cmd->base.autoneg = 1;
+				ethtool_cmd_speed_set(cmd, SPEED_1000);
+			cmd->duplex = DUPLEX_FULL;
+			cmd->autoneg = 1;
 		}
 	}
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	cmd->maxtxpkt = cmd->maxrxpkt = 0;
 
 	return 0;
 }
 
-static int gem_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
-	u32 speed = cmd->base.speed;
-	u32 advertising;
-
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
+	u32 speed = ethtool_cmd_speed(cmd);
 
 	/* Verify the settings we care about. */
-	if (cmd->base.autoneg != AUTONEG_ENABLE &&
-	    cmd->base.autoneg != AUTONEG_DISABLE)
+	if (cmd->autoneg != AUTONEG_ENABLE &&
+	    cmd->autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->base.autoneg == AUTONEG_ENABLE &&
-	    advertising == 0)
+	if (cmd->autoneg == AUTONEG_ENABLE &&
+	    cmd->advertising == 0)
 		return -EINVAL;
 
-	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	if (cmd->autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->base.duplex != DUPLEX_HALF &&
-	      cmd->base.duplex != DUPLEX_FULL)))
+	     (cmd->duplex != DUPLEX_HALF &&
+	      cmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -2682,13 +2666,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static const struct ethtool_ops gem_ethtool_ops = {
 	.get_drvinfo		= gem_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
+	.get_settings		= gem_get_settings,
+	.set_settings		= gem_set_settings,
 	.nway_reset		= gem_nway_reset,
 	.get_msglevel		= gem_get_msglevel,
 	.set_msglevel		= gem_set_msglevel,
 	.get_wol		= gem_get_wol,
 	.set_wol		= gem_set_wol,
-	.get_link_ksettings	= gem_get_link_ksettings,
-	.set_link_ksettings	= gem_set_link_ksettings,
 };
 
 static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 53ff66ef53ac..72ff05cd3ed8 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -1294,10 +1294,9 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 }
 
 /* hp->happy_lock must be held */
-static void
-happy_meal_begin_auto_negotiation(struct happy_meal *hp,
-				  void __iomem *tregs,
-				  const struct ethtool_link_ksettings *ep)
+static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
+					      void __iomem *tregs,
+					      struct ethtool_cmd *ep)
 {
 	int timeout;
 
@@ -1310,7 +1309,7 @@ happy_meal_begin_auto_negotiation(struct happy_meal *hp,
 	/* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */
 
 	hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE);
-	if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
+	if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
 		/* Advertise everything we can support. */
 		if (hp->sw_bmsr & BMSR_10HALF)
 			hp->sw_advertise |= (ADVERTISE_10HALF);
@@ -1385,14 +1384,14 @@ force_link:
 		/* Disable auto-negotiation in BMCR, enable the duplex and
 		 * speed setting, init the timer state machine, and fire it off.
 		 */
-		if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
+		if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
 			hp->sw_bmcr = BMCR_SPEED100;
 		} else {
-			if (ep->base.speed == SPEED_100)
+			if (ethtool_cmd_speed(ep) == SPEED_100)
 				hp->sw_bmcr = BMCR_SPEED100;
 			else
 				hp->sw_bmcr = 0;
-			if (ep->base.duplex == DUPLEX_FULL)
+			if (ep->duplex == DUPLEX_FULL)
 				hp->sw_bmcr |= BMCR_FULLDPLX;
 		}
 		happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr);
@@ -2435,21 +2434,20 @@ static void happy_meal_set_multicast(struct net_device *dev)
 }
 
 /* Ethtool support... */
-static int hme_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 	u32 speed;
-	u32 supported;
 
-	supported =
+	cmd->supported =
 		(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 		 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 		 SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
 
 	/* XXX hardcoded stuff for now */
-	cmd->base.port = PORT_TP; /* XXX no MII support */
-	cmd->base.phy_address = 0; /* XXX fixed PHYAD */
+	cmd->port = PORT_TP; /* XXX no MII support */
+	cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */
+	cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 	/* Record PHY settings. */
 	spin_lock_irq(&hp->happy_lock);
@@ -2458,45 +2456,41 @@ static int hme_get_link_ksettings(struct net_device *dev,
 	spin_unlock_irq(&hp->happy_lock);
 
 	if (hp->sw_bmcr & BMCR_ANENABLE) {
-		cmd->base.autoneg = AUTONEG_ENABLE;
+		cmd->autoneg = AUTONEG_ENABLE;
 		speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ?
 			 SPEED_100 : SPEED_10);
 		if (speed == SPEED_100)
-			cmd->base.duplex =
+			cmd->duplex =
 				(hp->sw_lpa & (LPA_100FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 		else
-			cmd->base.duplex =
+			cmd->duplex =
 				(hp->sw_lpa & (LPA_10FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->autoneg = AUTONEG_DISABLE;
 		speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-		cmd->base.duplex =
+		cmd->duplex =
 			(hp->sw_bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
-	cmd->base.speed = speed;
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-
+	ethtool_cmd_speed_set(cmd, speed);
 	return 0;
 }
 
-static int hme_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 
 	/* Verify the settings we care about. */
-	if (cmd->base.autoneg != AUTONEG_ENABLE &&
-	    cmd->base.autoneg != AUTONEG_DISABLE)
+	if (cmd->autoneg != AUTONEG_ENABLE &&
+	    cmd->autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
-	if (cmd->base.autoneg == AUTONEG_DISABLE &&
-	    ((cmd->base.speed != SPEED_100 &&
-	      cmd->base.speed != SPEED_10) ||
-	     (cmd->base.duplex != DUPLEX_HALF &&
-	      cmd->base.duplex != DUPLEX_FULL)))
+	if (cmd->autoneg == AUTONEG_DISABLE &&
+	    ((ethtool_cmd_speed(cmd) != SPEED_100 &&
+	      ethtool_cmd_speed(cmd) != SPEED_10) ||
+	     (cmd->duplex != DUPLEX_HALF &&
+	      cmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Ok, do it to it. */
@@ -2543,10 +2537,10 @@ static u32 hme_get_link(struct net_device *dev)
 }
 
 static const struct ethtool_ops hme_ethtool_ops = {
+	.get_settings		= hme_get_settings,
+	.set_settings		= hme_set_settings,
 	.get_drvinfo		= hme_get_drvinfo,
 	.get_link		= hme_get_link,
-	.get_link_ksettings	= hme_get_link_ksettings,
-	.set_link_ksettings	= hme_set_link_ksettings,
 };
 
 static int hme_version_printed;
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index fa6a06571187..72013314bba8 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1206,68 +1206,61 @@ void gelic_net_get_drvinfo(struct net_device *netdev,
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int gelic_ether_get_link_ksettings(struct net_device *netdev,
-					  struct ethtool_link_ksettings *cmd)
+static int gelic_ether_get_settings(struct net_device *netdev,
+				    struct ethtool_cmd *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
-	u32 supported, advertising;
 
 	gelic_card_get_ether_port_status(card, 0);
 
 	if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX)
-		cmd->base.duplex = DUPLEX_FULL;
+		cmd->duplex = DUPLEX_FULL;
 	else
-		cmd->base.duplex = DUPLEX_HALF;
+		cmd->duplex = DUPLEX_HALF;
 
 	switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) {
 	case GELIC_LV1_ETHER_SPEED_10:
-		cmd->base.speed = SPEED_10;
+		ethtool_cmd_speed_set(cmd, SPEED_10);
 		break;
 	case GELIC_LV1_ETHER_SPEED_100:
-		cmd->base.speed = SPEED_100;
+		ethtool_cmd_speed_set(cmd, SPEED_100);
 		break;
 	case GELIC_LV1_ETHER_SPEED_1000:
-		cmd->base.speed = SPEED_1000;
+		ethtool_cmd_speed_set(cmd, SPEED_1000);
 		break;
 	default:
 		pr_info("%s: speed unknown\n", __func__);
-		cmd->base.speed = SPEED_10;
+		ethtool_cmd_speed_set(cmd, SPEED_10);
 		break;
 	}
 
-	supported = SUPPORTED_TP | SUPPORTED_Autoneg |
+	cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg |
 			SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Full;
-	advertising = supported;
+	cmd->advertising = cmd->supported;
 	if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) {
-		cmd->base.autoneg = AUTONEG_ENABLE;
+		cmd->autoneg = AUTONEG_ENABLE;
 	} else {
-		cmd->base.autoneg = AUTONEG_DISABLE;
-		advertising &= ~ADVERTISED_Autoneg;
+		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->advertising &= ~ADVERTISED_Autoneg;
 	}
-	cmd->base.port = PORT_TP;
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	cmd->port = PORT_TP;
 
 	return 0;
 }
 
-static int
-gelic_ether_set_link_ksettings(struct net_device *netdev,
-			       const struct ethtool_link_ksettings *cmd)
+static int gelic_ether_set_settings(struct net_device *netdev,
+				    struct ethtool_cmd *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
 	u64 mode;
 	int ret;
 
-	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+	if (cmd->autoneg == AUTONEG_ENABLE) {
 		mode = GELIC_LV1_ETHER_AUTO_NEG;
 	} else {
-		switch (cmd->base.speed) {
+		switch (cmd->speed) {
 		case SPEED_10:
 			mode = GELIC_LV1_ETHER_SPEED_10;
 			break;
@@ -1280,9 +1273,9 @@ gelic_ether_set_link_ksettings(struct net_device *netdev,
 		default:
 			return -EINVAL;
 		}
-		if (cmd->base.duplex == DUPLEX_FULL) {
+		if (cmd->duplex == DUPLEX_FULL)
 			mode |= GELIC_LV1_ETHER_FULL_DUPLEX;
-		} else if (cmd->base.speed == SPEED_1000) {
+		else if (cmd->speed == SPEED_1000) {
 			pr_info("1000 half duplex is not supported.\n");
 			return -EINVAL;
 		}
@@ -1377,11 +1370,11 @@ done:
 
 static const struct ethtool_ops gelic_ether_ethtool_ops = {
 	.get_drvinfo	= gelic_net_get_drvinfo,
+	.get_settings	= gelic_ether_get_settings,
+	.set_settings	= gelic_ether_set_settings,
 	.get_link	= ethtool_op_get_link,
 	.get_wol	= gelic_net_get_wol,
 	.set_wol	= gelic_net_set_wol,
-	.get_link_ksettings = gelic_ether_get_link_ksettings,
-	.set_link_ksettings = gelic_ether_set_link_ksettings,
 };
 
 /**
diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
index 16bd036d0682..ffe519382e11 100644
--- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c
+++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
@@ -47,23 +47,19 @@ static struct {
 };
 
 static int
-spider_net_ethtool_get_link_ksettings(struct net_device *netdev,
-				      struct ethtool_link_ksettings *cmd)
+spider_net_ethtool_get_settings(struct net_device *netdev,
+			       struct ethtool_cmd *cmd)
 {
 	struct spider_net_card *card;
 	card = netdev_priv(netdev);
 
-	ethtool_link_ksettings_zero_link_mode(cmd, supported);
-	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-
-	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-
-	cmd->base.port = PORT_FIBRE;
-	cmd->base.speed = card->phy.speed;
-	cmd->base.duplex = DUPLEX_FULL;
+	cmd->supported   = (SUPPORTED_1000baseT_Full |
+			     SUPPORTED_FIBRE);
+	cmd->advertising = (ADVERTISED_1000baseT_Full |
+			     ADVERTISED_FIBRE);
+	cmd->port = PORT_FIBRE;
+	ethtool_cmd_speed_set(cmd, card->phy.speed);
+	cmd->duplex = DUPLEX_FULL;
 
 	return 0;
 }
@@ -170,6 +166,7 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 const struct ethtool_ops spider_net_ethtool_ops = {
+	.get_settings		= spider_net_ethtool_get_settings,
 	.get_drvinfo		= spider_net_ethtool_get_drvinfo,
 	.get_wol		= spider_net_ethtool_get_wol,
 	.get_msglevel		= spider_net_ethtool_get_msglevel,
@@ -180,6 +177,5 @@ const struct ethtool_ops spider_net_ethtool_ops = {
 	.get_strings		= spider_net_get_strings,
 	.get_sset_count		= spider_net_get_sset_count,
 	.get_ethtool_stats	= spider_net_get_ethtool_stats,
-	.get_link_ksettings	= spider_net_ethtool_get_link_ksettings,
 };
 
-- 
cgit v1.2.3


From 3331aa378e9bcbd0d16de9034b0c20f4050e26b4 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 6 Mar 2017 08:48:58 -0500
Subject: team: use ETH_MAX_MTU as max mtu

This restores the ability to set a team device's mtu to anything higher
than 1500. Similar to the reported issue with bonding, the team driver
calls ether_setup(), which sets an initial max_mtu of 1500, while the
underlying hardware can handle something much larger. Just set it to
ETH_MAX_MTU to support all possible values, and the limitations of the
underlying devices will prevent setting anything too large.

Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
CC: Cong Wang <xiyou.wangcong@gmail.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 4a24b5d15f5a..1b52520715ae 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2072,6 +2072,7 @@ static int team_dev_type_check_change(struct net_device *dev,
 static void team_setup(struct net_device *dev)
 {
 	ether_setup(dev);
+	dev->max_mtu = ETH_MAX_MTU;
 
 	dev->netdev_ops = &team_netdev_ops;
 	dev->ethtool_ops = &team_ethtool_ops;
-- 
cgit v1.2.3


From f7887d40e541f74402df0684a1463c0a0bb68c68 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 6 Mar 2017 08:53:04 -0800
Subject: vrf: Fix use-after-free in vrf_xmit

KASAN detected a use-after-free:

[  269.467067] BUG: KASAN: use-after-free in vrf_xmit+0x7f1/0x827 [vrf] at addr ffff8800350a21c0
[  269.467067] Read of size 4 by task ssh/1879
[  269.467067] CPU: 1 PID: 1879 Comm: ssh Not tainted 4.10.0+ #249
[  269.467067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[  269.467067] Call Trace:
[  269.467067]  dump_stack+0x81/0xb6
[  269.467067]  kasan_object_err+0x21/0x78
[  269.467067]  kasan_report+0x2f7/0x450
[  269.467067]  ? vrf_xmit+0x7f1/0x827 [vrf]
[  269.467067]  ? ip_output+0xa4/0xdb
[  269.467067]  __asan_load4+0x6b/0x6d
[  269.467067]  vrf_xmit+0x7f1/0x827 [vrf]
...

Which corresponds to the skb access after xmit handling. Fix by saving
skb->len and using the saved value to update stats.

Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 22379da63400..fea687f35b5a 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -340,6 +340,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
 
 static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	int len = skb->len;
 	netdev_tx_t ret = is_ip_tx_frame(skb, dev);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
@@ -347,7 +348,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		u64_stats_update_begin(&dstats->syncp);
 		dstats->tx_pkts++;
-		dstats->tx_bytes += skb->len;
+		dstats->tx_bytes += len;
 		u64_stats_update_end(&dstats->syncp);
 	} else {
 		this_cpu_inc(dev->dstats->tx_drps);
-- 
cgit v1.2.3


From 713c43b315fc160dc4ff3da0020ebe09b8a65587 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 6 Mar 2017 21:22:09 +0100
Subject: mlxsw: spectrum_flower: Remove bogus warns in mlxsw_sp_flower_destroy

This warnings may be hit even in case they should not - in case user
puts a TC-flower rule which failed to be offloaded. So just remove them.

Reported-by: Petr Machata <petrm@mellanox.com>
Reported-by: Ido Schimmel <idosch@mellanox.com>
Fixes: commit 7aa0f5aa9030 ("mlxsw: spectrum: Implement TC flower offload")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 22ab42925377..ae6cccc666e4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -303,11 +303,11 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 	ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
 					   ingress,
 					   MLXSW_SP_ACL_PROFILE_FLOWER);
-	if (WARN_ON(IS_ERR(ruleset)))
+	if (IS_ERR(ruleset))
 		return;
 
 	rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
-	if (!WARN_ON(!rule)) {
+	if (rule) {
 		mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
 		mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
 	}
-- 
cgit v1.2.3


From 7aafac11e308d37ed3c509829bb43d80c1811ac3 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 22 Feb 2017 15:43:59 +1100
Subject: powerpc/powernv/ioda2: Gracefully fail if too many TCE levels
 requested

The IODA2 specification says that a 64 DMA address cannot use top 4 bits
(3 are reserved and one is a "TVE select"); bottom page_shift bits
cannot be used for multilevel table addressing either.

The existing IODA2 table allocation code aligns the minimum TCE table
size to PAGE_SIZE so in the case of 64K system pages and 4K IOMMU pages,
we have 64-4-12=48 bits. Since 64K page stores 8192 TCEs, i.e. needs
13 bits, the maximum number of levels is 48/13 = 3 so we physically
cannot address more and EEH happens on DMA accesses.

This adds a check that too many levels were requested.

It is still possible to have 5 levels in the case of 4K system page size.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6901a06da2f9..957a57a6c812 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2624,6 +2624,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 	level_shift = entries_shift + 3;
 	level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
 
+	if ((level_shift - 3) * levels + page_shift >= 60)
+		return -EINVAL;
+
 	/* Allocate TCE table */
 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
 			levels, tce_table_size, &offset, &total_allocated);
-- 
cgit v1.2.3


From 89cf83d4e065ff9fbd2ddc674489c8058eeca758 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 16 Feb 2017 12:54:41 +0000
Subject: drm/i915: Squelch any ktime/jiffie rounding errors for wait-ioctl

We wait upon jiffies, but report the time elapsed using a
high-resolution timer. This discrepancy can lead to us timing out the
wait prior to us reporting the elapsed time as complete.

This restores the squelching lost in commit e95433c73a11 ("drm/i915:
Rearrange i915_wait_request() accounting with callers").

Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.10-rc1+
Cc: stable@vger.kernel.org
Link: http://patchwork.freedesktop.org/patch/msgid/20170216125441.30923-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit c1d2061b28c2aa25ec39b60d9c248e6beebd7315)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6908123162d1..c45af09555dc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3029,6 +3029,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
 		if (args->timeout_ns < 0)
 			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
 	}
 
 	i915_gem_object_put(obj);
-- 
cgit v1.2.3


From 1d972d6021a1388021df51a58248e68372ce2b5d Mon Sep 17 00:00:00 2001
From: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Date: Thu, 23 Feb 2017 09:15:57 +0200
Subject: drm/i915/glk: Fix watermark computations for third sprite plane
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Geminilake has a third sprite plane (or fourth universal plane) that is
independent from the cursor. Make sure that for_each_plane_id_on_crtc()
is aware of that extra plane so that the watermark code takes it into
account.

Fixes: e9c9882556fc ("drm/i915/glk: Configure number of sprite planes properly")
Cc: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org>
Signed-off-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-2-ander.conselvan.de.oliveira@intel.com
(cherry picked from commit 19c3164db457e0fc65d4501fd354506228576241)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a4b42d31391..7febe6eecf72 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -293,6 +293,7 @@ enum plane_id {
 	PLANE_PRIMARY,
 	PLANE_SPRITE0,
 	PLANE_SPRITE1,
+	PLANE_SPRITE2,
 	PLANE_CURSOR,
 	I915_MAX_PLANES,
 };
-- 
cgit v1.2.3


From b717a0392530ae8da0da041abe5c3a6098b55660 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 24 Feb 2017 11:43:06 +0000
Subject: drm/i915/fbdev: Stop repeating tile configuration on stagnation

If we cease making progress in finding matching outputs for a tiled
configuration, stop looping over the remaining unconfigured outputs.

v2: Use conn_seq (instead of pass) to only apply tile configuration on
first pass.

Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. (v2)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: <stable@vger.kernel.org> # v3.19+
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170224114306.4400-1-chris@chris-wilson.co.uk
(cherry picked from commit 754a76591b12c88f57ad8b4ca533a5c9566a1922)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 1b8ba2e77539..2d449fb5d1d2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 				    bool *enabled, int width, int height)
 {
 	struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-	unsigned long conn_configured, mask;
+	unsigned long conn_configured, conn_seq, mask;
 	unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
 	int i, j;
 	bool *save_enabled;
 	bool fallback = true;
 	int num_connectors_enabled = 0;
 	int num_connectors_detected = 0;
-	int pass = 0;
 
 	save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
 	if (!save_enabled)
@@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 	mask = BIT(count) - 1;
 	conn_configured = 0;
 retry:
+	conn_seq = conn_configured;
 	for (i = 0; i < count; i++) {
 		struct drm_fb_helper_connector *fb_conn;
 		struct drm_connector *connector;
@@ -387,7 +387,7 @@ retry:
 		if (conn_configured & BIT(i))
 			continue;
 
-		if (pass == 0 && !connector->has_tile)
+		if (conn_seq == 0 && !connector->has_tile)
 			continue;
 
 		if (connector->status == connector_status_connected)
@@ -498,10 +498,8 @@ retry:
 		conn_configured |= BIT(i);
 	}
 
-	if ((conn_configured & mask) != mask) {
-		pass++;
+	if ((conn_configured & mask) != mask && conn_configured != conn_seq)
 		goto retry;
-	}
 
 	/*
 	 * If the BIOS didn't enable everything it could, fall back to have the
-- 
cgit v1.2.3


From 8c9923707f30ff56d9fd242053594b18f38d8036 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 27 Feb 2017 12:26:54 +0000
Subject: drm/i915: Remove the vma from the drm_mm if binding fails

As we track whether a vma has been inserted into the drm_mm using the
vma->flags, if we fail to bind the vma into the GTT we do not update
those bits and will attempt to reinsert the vma into the drm_mm on
future passes. To prevent that, we want to unwind i915_vma_insert() if
we fail in our attempt to bind.

Fixes: 59bfa1248e22 ("drm/i915: Start passing around i915_vma from execbuffer")
Testcase: igt/drv_selftest/live_gtt
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.9+
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-3-chris@chris-wilson.co.uk
(cherry picked from commit 31c7effa39f21f0fea1b3250ae9ff32b9c7e1ae5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 57 ++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 155906e84812..df20e9bc1c0f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -512,10 +512,36 @@ err_unpin:
 	return ret;
 }
 
+static void
+i915_vma_remove(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+
+	drm_mm_remove_node(&vma->node);
+	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+	/* Since the unbound list is global, only move to that list if
+	 * no more VMAs exist.
+	 */
+	if (--obj->bind_count == 0)
+		list_move_tail(&obj->global_link,
+			       &to_i915(obj->base.dev)->mm.unbound_list);
+
+	/* And finally now the object is completely decoupled from this vma,
+	 * we can drop its hold on the backing storage and allow it to be
+	 * reaped by the shrinker.
+	 */
+	i915_gem_object_unpin_pages(obj);
+	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags)
 {
-	unsigned int bound = vma->flags;
+	const unsigned int bound = vma->flags;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 
 	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
 		ret = -EBUSY;
-		goto err;
+		goto err_unpin;
 	}
 
 	if ((bound & I915_VMA_BIND_MASK) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
 		if (ret)
-			goto err;
+			goto err_unpin;
 	}
 
 	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
 	if (ret)
-		goto err;
+		goto err_remove;
 
 	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
 		__i915_vma_set_map_and_fenceable(vma);
@@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 	return 0;
 
-err:
+err_remove:
+	if ((bound & I915_VMA_BIND_MASK) == 0) {
+		GEM_BUG_ON(vma->pages);
+		i915_vma_remove(vma);
+	}
+err_unpin:
 	__i915_vma_unpin(vma);
 	return ret;
 }
@@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	}
 	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
 
-	drm_mm_remove_node(&vma->node);
-	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-
 	if (vma->pages != obj->mm.pages) {
 		GEM_BUG_ON(!vma->pages);
 		sg_free_table(vma->pages);
@@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	}
 	vma->pages = NULL;
 
-	/* Since the unbound list is global, only move to that list if
-	 * no more VMAs exist. */
-	if (--obj->bind_count == 0)
-		list_move_tail(&obj->global_link,
-			       &to_i915(obj->base.dev)->mm.unbound_list);
-
-	/* And finally now the object is completely decoupled from this vma,
-	 * we can drop its hold on the backing storage and allow it to be
-	 * reaped by the shrinker.
-	 */
-	i915_gem_object_unpin_pages(obj);
-	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+	i915_vma_remove(vma);
 
 destroy:
 	if (unlikely(i915_vma_is_closed(vma)))
-- 
cgit v1.2.3


From 34dc8993eef63681b062871413a9484008a2a78f Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 15 Feb 2017 15:52:59 +0200
Subject: drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Certain Baytrails, namely the 4 cpu core variants, have been
plaqued by spurious system hangs, mostly occurring with light loads.

Multiple bisects by various people point to a commit which changes the
reclocking strategy for Baytrail to follow its bigger brethen:
commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail")

There is also a review comment attached to this commit from Deepak S
on avoiding punit access on Cherryview and thus it was excluded on
common reclocking path. By taking the same approach and omitting
the punit access by not tweaking the thresholds when the hardware
has been asked to move into different frequency, considerable gains
in stability have been observed.

With J1900 box, light render/video load would end up in system hang
in usually less than 12 hours. With this patch applied, the cumulative
uptime has now been 34 days without issues. To provoke system hang,
light loads on both render and bsd engines in parallel have been used:
glxgears >/dev/null 2>/dev/null &
mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4

So far, author has not witnessed system hang with above load
and this patch applied. Reports from the tenacious people at
kernel bugzilla are also promising.

Considering that the punit access frequency with this patch is
considerably less, there is a possibility that this will push
the, still unknown, root cause past the triggering point on most loads.

But as we now can reliably reproduce the hang independently,
we can reduce the pain that users are having and use a
static thresholds until a root cause is found.

v3: don't break debugfs and simplification (Chris Wilson)

References: https://bugzilla.kernel.org/show_bug.cgi?id=109051
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: fritsch@xbmc.org
Cc: miku@iki.fi
Cc: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
CC: Michal Feix <michal@feix.cz>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.2+
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com
(cherry picked from commit 6067a27d1f0184596d51decbac1c1fdc4acb012f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 249623d45be0..65cd4c56c9dd 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		break;
 	}
 
+	/* When byt can survive without system hang with dynamic
+	 * sw freq adjustments, this restriction can be lifted.
+	 */
+	if (IS_VALLEYVIEW(dev_priv))
+		goto skip_hw_write;
+
 	I915_WRITE(GEN6_RP_UP_EI,
 		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
 	I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
+skip_hw_write:
 	dev_priv->rps.power = new_power;
 	dev_priv->rps.up_threshold = threshold_up;
 	dev_priv->rps.down_threshold = threshold_down;
-- 
cgit v1.2.3


From d253371c4c2f5fc2d884ef25f64decd7549aff5a Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 24 Feb 2017 16:32:10 +0200
Subject: drm/i915/gen9: Increase PCODE request timeout to 50ms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After
commit 2c7d0602c815277f7cb7c932b091288710d8aba7
Author: Imre Deak <imre.deak@intel.com>
Date:   Mon Dec 5 18:27:37 2016 +0200

    drm/i915/gen9: Fix PCODE polling during CDCLK change notification

there is still one report of the CDCLK-change request timing out on a
KBL machine, see the Reference link. On that machine the maximum time
the request took to succeed was 34ms, so increase the timeout to 50ms.

v2:
- Change timeout from 100 to 50 ms to maintain the current 50 ms limit
  for atomic waits in the driver. (Chris, Tvrtko)

Reference: https://bugs.freedesktop.org/show_bug.cgi?id=99345
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1487946730-17162-1-git-send-email-imre.deak@intel.com
(cherry picked from commit 0129936ddda26afd5d9d207c4e86b2425952579f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 65cd4c56c9dd..940bab22d464 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7923,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
  * @timeout_base_ms: timeout for polling with preemption enabled
  *
  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+10 ms expires.
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
  * The request is acknowledged once the PCODE reply dword equals @reply after
  * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 10 ms with
+ * for @timeout_base_ms and if this times out for another 50 ms with
  * preemption disabled.
  *
  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
@@ -7962,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 	 * worst case) _and_ PCODE was busy for some reason even after a
 	 * (queued) request and @timeout_base_ms delay. As a workaround retry
 	 * the poll with preemption disabled to maximize the number of
-	 * requests. Increase the timeout from @timeout_base_ms to 10ms to
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
 	 * account for interrupts that could reduce the number of these
-	 * requests.
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
 	 */
 	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
 	WARN_ON_ONCE(timeout_base_ms > 3);
 	preempt_disable();
-	ret = wait_for_atomic(COND, 10);
+	ret = wait_for_atomic(COND, 50);
 	preempt_enable();
 
 out:
-- 
cgit v1.2.3


From 38230243ef316ac696956d75dc78a22e3aa789b9 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 28 Feb 2017 15:28:47 +0100
Subject: drm/i915: Move updating color management to before vblank evasion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This cannot be done reliably during vblank evasasion
since the color management registers are not double buffered.

The original commit that moved it always during vblank evasion was
wrong, so revert it to before vblank evasion again.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: 20a34e78f0d7 ("drm/i915: Update color management during vblank evasion.")
Cc: stable@vger.kernel.org # v4.7+
Link: http://patchwork.freedesktop.org/patch/msgid/1488292128-14540-1-git-send-email-maarten.lankhorst@linux.intel.com
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 567f0792a6ad11c0c2620944b8eeb777359fb85a)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 01341670738f..9a8b6a13233d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14946,17 +14946,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc,
 		to_intel_atomic_state(old_crtc_state->state);
 	bool modeset = needs_modeset(crtc->state);
 
+	if (!modeset &&
+	    (intel_cstate->base.color_mgmt_changed ||
+	     intel_cstate->update_pipe)) {
+		intel_color_set_csc(crtc->state);
+		intel_color_load_luts(crtc->state);
+	}
+
 	/* Perform vblank evasion around commit operation */
 	intel_pipe_update_start(intel_crtc);
 
 	if (modeset)
 		goto out;
 
-	if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) {
-		intel_color_set_csc(crtc->state);
-		intel_color_load_luts(crtc->state);
-	}
-
 	if (intel_cstate->update_pipe)
 		intel_update_pipe_config(intel_crtc, old_intel_cstate);
 	else if (INTEL_GEN(dev_priv) >= 9)
-- 
cgit v1.2.3


From 0d9dc306e15b59bf50db87ebcb1e2248586d4733 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 7 Mar 2017 13:20:31 +0000
Subject: drm/i915: Store a permanent error in obj->mm.pages

Once the object has been truncated, it is unrecoverable. To facilitate
detection of this state store the error in obj->mm.pages.

This is required for the next patch which should be applied to v4.10
(via stable), so we also need to mark this patch for backporting. In
that regard, let's consider this to be a fix/improvement too.

v2: Avoid dereferencing the ERR_PTR when freeing the object.

Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170307132031.32461-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 4e5462ee843c883790e9609cf560d88960ea4227)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c45af09555dc..3591e8656ff9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2119,6 +2119,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	 */
 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
 /* Try to discard unwanted pages */
@@ -2218,7 +2219,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 
 	__i915_gem_object_reset_page_iter(obj);
 
-	obj->ops->put_pages(obj, pages);
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
 unlock:
 	mutex_unlock(&obj->mm.lock);
 }
@@ -2437,7 +2440,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	if (unlikely(!obj->mm.pages)) {
+	if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
 		err = ____i915_gem_object_get_pages(obj);
 		if (err)
 			goto unlock;
@@ -2515,7 +2518,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 
 	pinned = true;
 	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(!obj->mm.pages)) {
+		if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
 			ret = ____i915_gem_object_get_pages(obj);
 			if (ret)
 				goto err_unlock;
-- 
cgit v1.2.3


From 4e6fdafa7ac395ad47a80a0e7b4fd1e11550f862 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 7 Mar 2017 12:03:38 +0000
Subject: drm/i915: Use pagecache write to prepopulate shmemfs from
 pwrite-ioctl

Before we instantiate/pin the backing store for our use, we
can prepopulate the shmemfs filp efficiently using a write into the
pagecache. We avoid the penalty of instantiating all the pages, important
if the user is just writing to a few and never uses the object on the GPU,
and using a direct write into shmemfs allows it to avoid the cost of
retrieving a page (mostly the clear-before-use, but in theory we could
curtail swapin) before it is overwritten.

This can be extended later to provide additional specialisation for
other backends (other than shmemfs). For now it provides a defense
against very large write-only allocations from exhausting all of system
memory.

v2: Smelling fixes.

Fixes: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170307120338.7277-2-chris@chris-wilson.co.uk
(cherry picked from commit 7c55e2c5772dcf3cbacd0fa2bcfeefae416b73f7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        | 78 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_object.h |  3 ++
 2 files changed, 81 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3591e8656ff9..10777da73039 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
+	ret = -ENODEV;
+	if (obj->ops->pwrite)
+		ret = obj->ops->pwrite(obj, args);
+	if (ret != -ENODEV)
+		goto err;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_ALL,
@@ -2566,6 +2572,75 @@ err_unlock:
 	goto out_unlock;
 }
 
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+			   const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (READ_ONCE(obj->mm.pages))
+		return -ENODEV;
+
+	/* Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap(page);
+		unwritten = copy_from_user(vaddr + pg, user_data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		if (unwritten)
+			return -EFAULT;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
 static bool ban_context(const struct i915_gem_context *ctx)
 {
 	return (i915_gem_context_is_bannable(ctx) &&
@@ -3987,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
 	.get_pages = i915_gem_object_get_pages_gtt,
 	.put_pages = i915_gem_object_put_pages_gtt,
+
+	.pwrite = i915_gem_object_pwrite_gtt,
 };
 
 struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index bf90b07163d1..76b80a0be797 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops {
 	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
+	int (*pwrite)(struct drm_i915_gem_object *,
+		      const struct drm_i915_gem_pwrite *);
+
 	int (*dmabuf_export)(struct drm_i915_gem_object *);
 	void (*release)(struct drm_i915_gem_object *);
 };
-- 
cgit v1.2.3


From edd06b8353772dca7afcd4640dafa83b521edd55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 7 Mar 2017 22:54:19 +0200
Subject: drm/i915: Nuke debug messages from the pipe update critical section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

printks are slow so we should not be doing them from the vblank evade
critical section. These could explain why we sometimes seem to
blow past our 100 usec deadline.

The problem has been there ever since commit bfd16b2a23dc ("drm/i915:
Make updating pipe without modeset atomic.") but it may not have
been readily visible until commit e1edbd44e23b ("drm/i915: Complain
if we take too long under vblank evasion.") increased our chances
of noticing it.

Cc: stable@vger.kernel.org
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: bfd16b2a23dc ("drm/i915: Make updating pipe without modeset atomic.")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170307205419.19447-1-ville.syrjala@linux.intel.com
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit c3f8ad57a01a31397e5a0349a226a32f35ddc19c)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9a8b6a13233d..b3e0cd133b49 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc,
 	/* drm_atomic_helper_update_legacy_modeset_state might not be called. */
 	crtc->base.mode = crtc->base.state->mode;
 
-	DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n",
-		      old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h,
-		      pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-
 	/*
 	 * Update pipe size and adjust fitter if needed: the reason for this is
 	 * that in compute_mode_changes we check the native mode (not the pfit
@@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc->config->scaler_state;
 
-	DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
-
 	if (crtc->config->pch_pfit.enabled) {
 		int id;
 
-		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
-			DRM_ERROR("Requesting pfit without getting a scaler first\n");
+		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
 			return;
-		}
 
 		id = scaler_state->scaler_id;
 		I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
 			PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
 		I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
 		I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
-
-		DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
 	}
 }
 
-- 
cgit v1.2.3


From 5a8cf90d743f2d05433c6109f6c1b9b904b0cdb7 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 2 Feb 2017 20:47:41 +0000
Subject: drm/i915: Drain the freed state from the tail of the next commit

If we have any residual freed atomic state from earlier commits, flush
the freed list after performing the current modeset. This prevents the
freed list from ever-growing if userspace manages to starve the kernel
threads (i.e. we are never able to run our free state worker and
eventually the system may even oom).

Fixes: 6f0f02dc56f1 ("drm/i915: Move atomic state free from out of fence release")
Testcase: igt/kms_cursor/legacy/all-pipes-single-bo
Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170202204741.18231-1-chris@chris-wilson.co.uk
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit ba318c61a9719577b6f451c055f364e4116874b2)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b3e0cd133b49..3282b0f4b134 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14369,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state,
 	} while (progress);
 }
 
+static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
+{
+	struct intel_atomic_state *state, *next;
+	struct llist_node *freed;
+
+	freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+	llist_for_each_entry_safe(state, next, freed, freed)
+		drm_atomic_state_put(&state->base);
+}
+
+static void intel_atomic_helper_free_state_worker(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+
+	intel_atomic_helper_free_state(dev_priv);
+}
+
 static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 {
 	struct drm_device *dev = state->dev;
@@ -14535,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	 * can happen also when the device is completely off.
 	 */
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
+	intel_atomic_helper_free_state(dev_priv);
 }
 
 static void intel_atomic_commit_work(struct work_struct *work)
@@ -16591,18 +16611,6 @@ fail:
 	drm_modeset_acquire_fini(&ctx);
 }
 
-static void intel_atomic_helper_free_state(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), atomic_helper.free_work);
-	struct intel_atomic_state *state, *next;
-	struct llist_node *freed;
-
-	freed = llist_del_all(&dev_priv->atomic_helper.free_list);
-	llist_for_each_entry_safe(state, next, freed, freed)
-		drm_atomic_state_put(&state->base);
-}
-
 int intel_modeset_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16623,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev)
 	dev->mode_config.funcs = &intel_mode_funcs;
 
 	INIT_WORK(&dev_priv->atomic_helper.free_work,
-		  intel_atomic_helper_free_state);
+		  intel_atomic_helper_free_state_worker);
 
 	intel_init_quirks(dev);
 
-- 
cgit v1.2.3


From a677e7046ab5edb33d051bda60cb3be0d60a48cc Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:32 +0530
Subject: KVM: Add documentation for KVM_CAP_NR_MEMSLOTS

Add documentation for KVM_CAP_NR_MEMSLOTS capability.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/api.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 069450938b79..3c248f772ae6 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -951,6 +951,10 @@ This ioctl allows the user to create or modify a guest physical memory
 slot.  When changing an existing slot, it may be moved in the guest
 physical memory space, or its flags may be modified.  It may not be
 resized.  Slots may not overlap in guest physical address space.
+Bits 0-15 of "slot" specifies the slot id and this value should be
+less than the maximum number of user memory slots supported per VM.
+The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
+if this capability is supported by the architecture.
 
 If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
 specifies the address space which is being modified.  They must be
-- 
cgit v1.2.3


From 7af4df85796589e60a2dfc0f821eca0c4bbce4d2 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:33 +0530
Subject: KVM: arm/arm64: Enable KVM_CAP_NR_MEMSLOTS on arm/arm64

Return KVM_USER_MEM_SLOTS for userspace capability query on
NR_MEMSLOTS.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c9a2103faeb9..96dba7cd8be7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
 	case KVM_CAP_MSI_DEVID:
 		if (!kvm)
 			r = -EINVAL;
-- 
cgit v1.2.3


From 3e92f94a3b8e925a6dd7ec88a5794b2084b5fb65 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:34 +0530
Subject: KVM: arm/arm64: Remove KVM_PRIVATE_MEM_SLOTS definition that are
 unused

arm/arm64 architecture doesnt use private memslots, hence removing
KVM_PRIVATE_MEM_SLOTS macro definition.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h   | 1 -
 arch/arm64/include/asm/kvm_host.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index cc495d799c67..31ee468ce667 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 #define KVM_HALT_POLL_NS_DEFAULT 500000
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f21fd3894370..6ac17ee887c9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -31,7 +31,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 
-- 
cgit v1.2.3


From 955a3fc6d2a1c11d6d00bce4f3816100ce0530cf Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:35 +0530
Subject: KVM: arm64: Increase number of user memslots to 512

Having only 32 memslots is a real constraint for the maximum
number of PCI devices that can be assigned to a single guest.
Assuming each PCI device/virtual function having two memory BAR
regions, we could assign only 15 devices/virtual functions to a
guest.

Hence increase KVM_USER_MEM_SLOTS to 512 as done in other archs like
powerpc.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6ac17ee887c9..e7705e7bb07b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,7 +30,7 @@
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
-#define KVM_USER_MEM_SLOTS 32
+#define KVM_USER_MEM_SLOTS 512
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 
-- 
cgit v1.2.3


From db08e1d53034a54fe177ced70476fda73954b9e9 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 21 Feb 2017 13:41:31 +1100
Subject: powerpc/powernv/ioda2: Update iommu table base on ownership change

On POWERNV platform, in order to do DMA via IOMMU (i.e. 32bit DMA in
our case), a device needs an iommu_table pointer set via
set_iommu_table_base().

The codeflow is:
- pnv_pci_ioda2_setup_dma_pe()
	- pnv_pci_ioda2_setup_default_config()
	- pnv_ioda_setup_bus_dma() [1]

pnv_pci_ioda2_setup_dma_pe() creates IOMMU groups,
pnv_pci_ioda2_setup_default_config() does default DMA setup,
pnv_ioda_setup_bus_dma() takes a bus PE (on IODA2, all physical function
PEs as bus PEs except NPU), walks through all underlying buses and
devices, adds all devices to an IOMMU group and sets iommu_table.

On IODA2, when VFIO is used, it takes ownership over a PE which means it
removes all tables and creates new ones (with a possibility of sharing
them among PEs). So when the ownership is returned from VFIO to
the kernel, the iommu_table pointer written to a device at [1] is
stale and needs an update.

This adds an "add_to_group" parameter to pnv_ioda_setup_bus_dma()
(in fact re-adds as it used to be there a while ago for different
reasons) to tell the helper if a device needs to be added to
an IOMMU group with an iommu_table update or just the latter.

This calls pnv_ioda_setup_bus_dma(..., false) from
pnv_ioda2_release_ownership() so when the ownership is restored,
32bit DMA can work again for a device. This does the same thing
on obtaining ownership as the iommu_table point is stale at this point
anyway and it is safer to have NULL there.

We did not hit this earlier as all tested devices in recent years were
only using 64bit DMA; the rare exception for this is MPT3 SAS adapter
which uses both 32bit and 64bit DMA access and it has not been tested
with VFIO much.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 957a57a6c812..e36738291c32 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
 }
 
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
-				   struct pci_bus *bus)
+				   struct pci_bus *bus,
+				   bool add_to_group)
 {
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
 		set_dma_offset(&dev->dev, pe->tce_bypass_base);
-		iommu_add_device(&dev->dev);
+		if (add_to_group)
+			iommu_add_device(&dev->dev);
 
 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+					add_to_group);
 	}
 }
 
@@ -2191,7 +2194,7 @@ found:
 		set_iommu_table_base(&pe->pdev->dev, tbl);
 		iommu_add_device(&pe->pdev->dev);
 	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 
 	return;
  fail:
@@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 
 	pnv_pci_ioda2_set_bypass(pe, false);
 	pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 	pnv_ioda2_table_free(tbl);
 }
 
@@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
 						table_group);
 
 	pnv_pci_ioda2_setup_default_config(pe);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 }
 
 static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2731,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	if (pe->flags & PNV_IODA_PE_DEV)
 		iommu_add_device(&pe->pdev->dev);
 	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 }
 
 #ifdef CONFIG_PCI_MSI
-- 
cgit v1.2.3


From 85550f9148a852ed363a386577ad31b97b95dfb8 Mon Sep 17 00:00:00 2001
From: Jelle Martijn Kok <jmkok@youcom.nl>
Date: Tue, 21 Feb 2017 12:48:18 +0100
Subject: usb: ohci-at91: Do not drop unhandled USB suspend control requests

In patch 2e2aa1bc7eff90ecm, USB suspend and wakeup control requests are
passed to SFR_OHCIICR register. If a processor does not have such a
register, this hub control request will be dropped.

If no such a SFR register is available, all USB suspend control requests
will now be processed using ohci_hub_control()
(like before patch 2e2aa1bc7eff90ecm.)

Tested on an Atmel AT91SAM9G20 with an on-board TI TUSB2046B hub chip
If the last USB device is unplugged from the USB hub, the hub goes into
sleep and will not wakeup when an USB devices is inserted.

Fixes: 2e2aa1bc7eff90ec ("usb: ohci-at91: Forcibly suspend ports while USB suspend")
Signed-off-by: Jelle Martijn Kok <jmkok@youcom.nl>
Tested-by: Wenyou Yang <wenyou.yang@atmel.com>
Cc: Wenyou Yang <wenyou.yang@atmel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-at91.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 414e3c376dbb..5302f988e7e6 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -350,7 +350,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 
 		case USB_PORT_FEAT_SUSPEND:
 			dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n");
-			if (valid_port(wIndex)) {
+			if (valid_port(wIndex) && ohci_at91->sfr_regmap) {
 				ohci_at91_port_suspend(ohci_at91->sfr_regmap,
 						       1);
 				return 0;
@@ -393,7 +393,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 
 		case USB_PORT_FEAT_SUSPEND:
 			dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n");
-			if (valid_port(wIndex)) {
+			if (valid_port(wIndex) && ohci_at91->sfr_regmap) {
 				ohci_at91_port_suspend(ohci_at91->sfr_regmap,
 						       0);
 				return 0;
-- 
cgit v1.2.3


From fd567653bdb908009b650f079bfd4b63169e2ac4 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Wed, 22 Feb 2017 15:23:22 -0300
Subject: usb: phy: isp1301: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-isp1301.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c
index db68156568e6..b3b33cf7ddf6 100644
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, isp1301_id);
 
+static const struct of_device_id isp1301_of_match[] = {
+	{.compatible = "nxp,isp1301" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, isp1301_of_match);
+
 static struct i2c_client *isp1301_i2c_client;
 
 static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear)
@@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client)
 static struct i2c_driver isp1301_driver = {
 	.driver = {
 		.name = DRV_NAME,
+		.of_match_table = of_match_ptr(isp1301_of_match),
 	},
 	.probe = isp1301_probe,
 	.remove = isp1301_remove,
-- 
cgit v1.2.3


From b7321e81fc369abe353cf094d4f0dc2fe11ab95f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 7 Mar 2017 16:11:03 +0100
Subject: USB: iowarrior: fix NULL-deref at probe

Make sure to check for the required interrupt-in endpoint to avoid
dereferencing a NULL-pointer should a malicious device lack such an
endpoint.

Note that a fairly recent change purported to fix this issue, but added
an insufficient test on the number of endpoints only, a test which can
now be removed.

Fixes: 4ec0ef3a8212 ("USB: iowarrior: fix oops with malicious USB descriptors")
Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.")
Cc: stable <stable@vger.kernel.org>	# 2.6.21
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/iowarrior.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 095778ff984d..3ad058cbe6ca 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -781,12 +781,6 @@ static int iowarrior_probe(struct usb_interface *interface,
 	iface_desc = interface->cur_altsetting;
 	dev->product_id = le16_to_cpu(udev->descriptor.idProduct);
 
-	if (iface_desc->desc.bNumEndpoints < 1) {
-		dev_err(&interface->dev, "Invalid number of endpoints\n");
-		retval = -EINVAL;
-		goto error;
-	}
-
 	/* set up the endpoint information */
 	for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
 		endpoint = &iface_desc->endpoint[i].desc;
@@ -797,6 +791,13 @@ static int iowarrior_probe(struct usb_interface *interface,
 			/* this one will match for the IOWarrior56 only */
 			dev->int_out_endpoint = endpoint;
 	}
+
+	if (!dev->int_in_endpoint) {
+		dev_err(&interface->dev, "no interrupt-in endpoint found\n");
+		retval = -ENODEV;
+		goto error;
+	}
+
 	/* we have to check the report_size often, so remember it in the endianness suitable for our machine */
 	dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
 	if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&
-- 
cgit v1.2.3


From de46e56653de7b3b54baa625bd582635008b8d05 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 7 Mar 2017 16:11:04 +0100
Subject: USB: iowarrior: fix NULL-deref in write

Make sure to verify that we have the required interrupt-out endpoint for
IOWarrior56 devices to avoid dereferencing a NULL-pointer in write
should a malicious device lack such an endpoint.

Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.")
Cc: stable <stable@vger.kernel.org>     # 2.6.21
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/iowarrior.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 3ad058cbe6ca..37c63cb39714 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -798,6 +798,14 @@ static int iowarrior_probe(struct usb_interface *interface,
 		goto error;
 	}
 
+	if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) {
+		if (!dev->int_out_endpoint) {
+			dev_err(&interface->dev, "no interrupt-out endpoint found\n");
+			retval = -ENODEV;
+			goto error;
+		}
+	}
+
 	/* we have to check the report_size often, so remember it in the endianness suitable for our machine */
 	dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
 	if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&
-- 
cgit v1.2.3


From d595259fbb7a7afed241b1afb2c4fe4b47de47fa Mon Sep 17 00:00:00 2001
From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Date: Tue, 28 Feb 2017 00:46:58 +0100
Subject: usb-storage: Add ignore-residue quirk for Initio INIC-3619

This USB-SATA bridge chip is used in a StarTech enclosure for
optical drives.

Without the quirk MakeMKV fails during the key exchange with an
installed BluRay drive:
> Error 'Scsi error - ILLEGAL REQUEST:COPY PROTECTION KEY EXCHANGE FAILURE - KEY NOT ESTABLISHED'
> occurred while issuing SCSI command AD010..080002400 to device 'SG:dev_11:2'

Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 16cc18369111..9129f6cb8230 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2071,6 +2071,20 @@ UNUSUAL_DEV(  0x1370, 0x6828, 0x0110, 0x0110,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
+/*
+ * Reported by Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+ * The INIC-3619 bridge is used in the StarTech SLSODDU33B
+ * SATA-USB enclosure for slimline optical drives.
+ *
+ * The quirk enables MakeMKV to properly exchange keys with
+ * an installed BD drive.
+ */
+UNUSUAL_DEV(  0x13fd, 0x3609, 0x0209, 0x0209,
+		"Initio Corporation",
+		"INIC-3619",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_IGNORE_RESIDUE ),
+
 /* Reported by Qinglin Ye <yestyle@gmail.com> */
 UNUSUAL_DEV(  0x13fe, 0x3600, 0x0100, 0x0100,
 		"Kingston",
-- 
cgit v1.2.3


From cfa47afe77b393e2c24a57e7e9611857a0b064f1 Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:20 +0100
Subject: usb: usb251xb: remove max_{power,current}_{sp,bp} properties

Remove the max_{power,current}_{sp,bp} properties of the usb251xb driver
from devicetree. This is done to simplify the dt bindings as requested
by Rob Herring in https://lkml.org/lkml/2017/2/15/1283. If those
properties are ever needed by somebody they can be enabled again easily.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/usb251xb.txt | 20 ------------------
 drivers/usb/misc/usb251xb.c                        | 24 ++++------------------
 2 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index 0c065f77658f..a5efd10ace9e 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -40,26 +40,6 @@ Optional properties :
 	device connected.
  - sp-disabled-ports : Specifies the ports which will be self-power disabled
  - bp-disabled-ports : Specifies the ports which will be bus-power disabled
- - max-sp-power : Specifies the maximum current the hub consumes from an
-	upstream port when operating as self-powered hub including the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 2).
- - max-bp-power : Specifies the maximum current the hub consumes from an
-	upstream port when operating as bus-powered hub including the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 100).
- - max-sp-current : Specifies the maximum current the hub consumes from an
-	upstream port when operating as self-powered hub EXCLUDING the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 2).
- - max-bp-current : Specifies the maximum current the hub consumes from an
-	upstream port when operating as bus-powered hub EXCLUDING the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 100).
  - power-on-time : Specifies the time it takes from the time the host initiates
 	the power-on sequence to a port until the port has adequate power. The
 	value is given in ms in a 0 - 510 range (default is 100ms).
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 4e18600dc9b4..3f9c3060c477 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -432,26 +432,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 		}
 	}
 
-	hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
-	if (!of_property_read_u32(np, "max-sp-power", property_u32))
-		hub->max_power_sp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					  250);
-
-	hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS;
-	if (!of_property_read_u32(np, "max-bp-power", property_u32))
-		hub->max_power_bp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					  250);
-
-	hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF;
-	if (!of_property_read_u32(np, "max-sp-current", property_u32))
-		hub->max_current_sp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					    250);
-
-	hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS;
-	if (!of_property_read_u32(np, "max-bp-current", property_u32))
-		hub->max_current_bp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					    250);
-
 	hub->power_on_time = USB251XB_DEF_POWER_ON_TIME;
 	if (!of_property_read_u32(np, "power-on-time", property_u32))
 		hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2,
@@ -492,6 +472,10 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 	/* The following parameters are currently not exposed to devicetree, but
 	 * may be as soon as needed.
 	 */
+	hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
+	hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS;
+	hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF;
+	hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS;
 	hub->bat_charge_en = USB251XB_DEF_BATTERY_CHARGING_ENABLE;
 	hub->boost_up = USB251XB_DEF_BOOST_UP;
 	hub->boost_x = USB251XB_DEF_BOOST_X;
-- 
cgit v1.2.3


From 7f7d8ba3b2140d993887a7db7a83d85c1f8db0e8 Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:21 +0100
Subject: usb: usb251xb: dt: add unit suffix to oc-delay and power-on-time

Rename oc-delay-* to oc-delay-us and make it expect a time value.
Furthermore add -ms suffix to power-on-time. There changes were
suggested by Rob Herring in https://lkml.org/lkml/2017/2/15/1283.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/usb251xb.txt | 10 ++++---
 drivers/usb/misc/usb251xb.c                        | 35 ++++++++++++----------
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index a5efd10ace9e..91499ae028db 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -31,7 +31,9 @@ Optional properties :
 	(default is individual)
  - dynamic-power-switching : enable auto-switching from self- to bus-powered
 	operation if the local power source is removed or unavailable
- - oc-delay-{100us,4ms,8ms,16ms} : set over current timer delay (default is 8ms)
+ - oc-delay-us : Delay time (in microseconds) for filtering the over-current
+	sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If
+	an invalid value is given, the default is used instead.
  - compound-device : indicated the hub is part of a compound device
  - port-mapping-mode : enable port mapping mode
  - string-support : enable string descriptor support (required for manufacturer,
@@ -40,9 +42,9 @@ Optional properties :
 	device connected.
  - sp-disabled-ports : Specifies the ports which will be self-power disabled
  - bp-disabled-ports : Specifies the ports which will be bus-power disabled
- - power-on-time : Specifies the time it takes from the time the host initiates
-	the power-on sequence to a port until the port has adequate power. The
-	value is given in ms in a 0 - 510 range (default is 100ms).
+ - power-on-time-ms : Specifies the time it takes from the time the host
+	initiates the power-on sequence to a port until the port has adequate
+	power. The value is given in ms in a 0 - 510 range (default is 100ms).
 
 Examples:
 	usb2512b@2c {
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 3f9c3060c477..91f66d68bcb7 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -375,18 +375,24 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 	if (of_get_property(np, "dynamic-power-switching", NULL))
 		hub->conf_data2 |= BIT(7);
 
-	if (of_get_property(np, "oc-delay-100us", NULL)) {
-		hub->conf_data2 &= ~BIT(5);
-		hub->conf_data2 &= ~BIT(4);
-	} else if (of_get_property(np, "oc-delay-4ms", NULL)) {
-		hub->conf_data2 &= ~BIT(5);
-		hub->conf_data2 |= BIT(4);
-	} else if (of_get_property(np, "oc-delay-8ms", NULL)) {
-		hub->conf_data2 |= BIT(5);
-		hub->conf_data2 &= ~BIT(4);
-	} else if (of_get_property(np, "oc-delay-16ms", NULL)) {
-		hub->conf_data2 |= BIT(5);
-		hub->conf_data2 |= BIT(4);
+	if (!of_property_read_u32(np, "oc-delay-us", property_u32)) {
+		if (*property_u32 == 100) {
+			/* 100 us*/
+			hub->conf_data2 &= ~BIT(5);
+			hub->conf_data2 &= ~BIT(4);
+		} else if (*property_u32 == 4000) {
+			/* 4 ms */
+			hub->conf_data2 &= ~BIT(5);
+			hub->conf_data2 |= BIT(4);
+		} else if (*property_u32 == 16000) {
+			/* 16 ms */
+			hub->conf_data2 |= BIT(5);
+			hub->conf_data2 |= BIT(4);
+		} else {
+			/* 8 ms (DEFAULT) */
+			hub->conf_data2 |= BIT(5);
+			hub->conf_data2 &= ~BIT(4);
+		}
 	}
 
 	if (of_get_property(np, "compound-device", NULL))
@@ -433,9 +439,8 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 	}
 
 	hub->power_on_time = USB251XB_DEF_POWER_ON_TIME;
-	if (!of_property_read_u32(np, "power-on-time", property_u32))
-		hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					   255);
+	if (!of_property_read_u32(np, "power-on-time-ms", property_u32))
+		hub->power_on_time = min_t(u8, *property_u32 / 2, 255);
 
 	if (of_property_read_u16_array(np, "language-id", &hub->lang_id, 1))
 		hub->lang_id = USB251XB_DEF_LANGUAGE_ID;
-- 
cgit v1.2.3


From fa56fe4ca4a1e4d9715f144857c98074e41bc94f Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:22 +0100
Subject: doc: dt-bindings: usb251xb: mark reg as required

Mark the reg property as required and furthermore fix some typos and
spellings in the documentation.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/usb251xb.txt | 23 +++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index 91499ae028db..3957d4edaa74 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -7,18 +7,18 @@ Required properties :
  - compatible : Should be "microchip,usb251xb" or one of the specific types:
 	"microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b",
 	"microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi"
- - hub-reset-gpios : Should specify the gpio for hub reset
+ - reset-gpios : Should specify the gpio for hub reset
+ - reg : I2C address on the selected bus (default is <0x2C>)
 
 Optional properties :
- - reg : I2C address on the selected bus (default is <0x2C>)
  - skip-config : Skip Hub configuration, but only send the USB-Attach command
- - vendor-id : USB Vendor ID of the hub (16 bit, default is 0x0424)
- - product-id : USB Product ID of the hub (16 bit, default depends on type)
- - device-id : USB Device ID of the hub (16 bit, default is 0x0bb3)
- - language-id : USB Language ID (16 bit, default is 0x0000)
- - manufacturer : USB Manufacturer string (max 31 characters long)
- - product : USB Product string (max 31 characters long)
- - serial : USB Serial string (max 31 characters long)
+ - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424)
+ - product-id : Set USB Product ID of the hub (16 bit, default depends on type)
+ - device-id : Set USB Device ID of the hub (16 bit, default is 0x0bb3)
+ - language-id : Set USB Language ID (16 bit, default is 0x0000)
+ - manufacturer : Set USB Manufacturer string (max 31 characters long)
+ - product : Set USB Product string (max 31 characters long)
+ - serial : Set USB Serial string (max 31 characters long)
  - {bus,self}-powered : selects between self- and bus-powered operation (default
 	is self-powered)
  - disable-hi-speed : disable USB Hi-Speed support
@@ -34,7 +34,7 @@ Optional properties :
  - oc-delay-us : Delay time (in microseconds) for filtering the over-current
 	sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If
 	an invalid value is given, the default is used instead.
- - compound-device : indicated the hub is part of a compound device
+ - compound-device : indicate the hub is part of a compound device
  - port-mapping-mode : enable port mapping mode
  - string-support : enable string descriptor support (required for manufacturer,
 	product and serial string configuration)
@@ -49,7 +49,8 @@ Optional properties :
 Examples:
 	usb2512b@2c {
 		compatible = "microchip,usb2512b";
-		hub-reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+		reg = <0x2c>;
+		reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
 	};
 
 	usb2514b@2c {
-- 
cgit v1.2.3


From 829b84db0c0c120ae5855a69cc0c94ff75fafabb Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:23 +0100
Subject: MAINTAINERS: usb251xb: remove reference inexistent file

The platform_data header file was dropped in the merged version of the
USB251xB driver. Therefore remove its reference from the MAINTAINERS file.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe4848..c776906f67a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8307,7 +8307,6 @@ M:	Richard Leitner <richard.leitner@skidata.com>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	drivers/usb/misc/usb251xb.c
-F:	include/linux/platform_data/usb251xb.h
 F:	Documentation/devicetree/bindings/usb/usb251xb.txt
 
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER
-- 
cgit v1.2.3


From 2f6821462fe3ace62df3f1b5a9463153e8288298 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 24 Feb 2017 19:11:28 +0100
Subject: USB: serial: digi_acceleport: fix OOB-event processing

A recent change claimed to fix an off-by-one error in the OOB-port
completion handler, but instead introduced such an error. This could
specifically led to modem-status changes going unnoticed, effectively
breaking TIOCMGET.

Note that the offending commit fixes a loop-condition underflow and is
marked for stable, but should not be backported without this fix.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check")
Cc: stable <stable@vger.kernel.org>	# v2.6.30
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/digi_acceleport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index ab78111e0968..6537d3ca2797 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb)
 		return -1;
 
 	/* handle each oob command */
-	for (i = 0; i < urb->actual_length - 4; i += 4) {
+	for (i = 0; i < urb->actual_length - 3; i += 4) {
 		opcode = buf[i];
 		line = buf[i + 1];
 		status = buf[i + 2];
-- 
cgit v1.2.3


From 9200c6f177638909dbbaded8aeeeccbd48744400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 9 Feb 2017 00:30:22 +0100
Subject: Revert "phy: Add USB3 PHY support for Broadcom NSP SoC"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit d7bc1a7d41bf ("phy: Add USB3 PHY support for
Broadcom NSP SoC") as we already have driver for this PHY (shared by NS
and NSP). It was added in commit e5666281d9ea ("phy: bcm-ns-usb3: new
driver for USB 3.0 PHY on Northstar").

Instead of adding separated driver & duplicating code we should work on
improving existing (old) one. Thanks to work done by Broadcom we know
there is MDIO bus we weren't aware of & we know register names which
makes initialization more clear. This is very valuable info and we
should work on using it in existing driver afterwards.

Acked-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
---
 drivers/phy/Kconfig            |   8 --
 drivers/phy/Makefile           |   1 -
 drivers/phy/phy-bcm-nsp-usb3.c | 177 -----------------------------------------
 3 files changed, 186 deletions(-)
 delete mode 100644 drivers/phy/phy-bcm-nsp-usb3.c

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index dc5277ad1b5a..c11044439fd4 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -510,12 +510,4 @@ config PHY_MESON8B_USB2
 	  and GXBB SoCs.
 	  If unsure, say N.
 
-config PHY_NSP_USB3
-	tristate "Broadcom NorthStar plus USB3 PHY driver"
-	depends on OF && (ARCH_BCM_NSP || COMPILE_TEST)
-	select GENERIC_PHY
-	default ARCH_BCM_NSP
-	help
-	  Enable this to support the Broadcom Northstar plus USB3 PHY.
-	  If unsure, say N.
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index e7b0feb1e125..dd8f3b5d2918 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -62,4 +62,3 @@ obj-$(CONFIG_PHY_CYGNUS_PCIE)		+= phy-bcm-cygnus-pcie.o
 obj-$(CONFIG_ARCH_TEGRA) += tegra/
 obj-$(CONFIG_PHY_NS2_PCIE)		+= phy-bcm-ns2-pcie.o
 obj-$(CONFIG_PHY_MESON8B_USB2)		+= phy-meson8b-usb2.o
-obj-$(CONFIG_PHY_NSP_USB3)		+= phy-bcm-nsp-usb3.o
diff --git a/drivers/phy/phy-bcm-nsp-usb3.c b/drivers/phy/phy-bcm-nsp-usb3.c
deleted file mode 100644
index 49024eaa5545..000000000000
--- a/drivers/phy/phy-bcm-nsp-usb3.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mfd/syscon.h>
-#include <linux/mdio.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/phy/phy.h>
-#include <linux/regmap.h>
-
-#define NSP_USB3_RST_CTRL_OFFSET	0x3f8
-
-/* mdio reg access */
-#define NSP_USB3_PHY_BASE_ADDR_REG	0x1f
-
-#define NSP_USB3_PHY_PLL30_BLOCK	0x8000
-#define NSP_USB3_PLL_CONTROL		0x01
-#define NSP_USB3_PLLA_CONTROL0		0x0a
-#define NSP_USB3_PLLA_CONTROL1		0x0b
-
-#define NSP_USB3_PHY_TX_PMD_BLOCK	0x8040
-#define NSP_USB3_TX_PMD_CONTROL1	0x01
-
-#define NSP_USB3_PHY_PIPE_BLOCK		0x8060
-#define NSP_USB3_LFPS_CMP		0x02
-#define NSP_USB3_LFPS_DEGLITCH		0x03
-
-struct nsp_usb3_phy {
-	struct regmap *usb3_ctrl;
-	struct phy *phy;
-	struct mdio_device *mdiodev;
-};
-
-static int nsp_usb3_phy_init(struct phy *phy)
-{
-	struct nsp_usb3_phy *iphy = phy_get_drvdata(phy);
-	struct mii_bus *bus = iphy->mdiodev->bus;
-	int addr = iphy->mdiodev->addr;
-	u32 data;
-	int rc;
-
-	rc = regmap_read(iphy->usb3_ctrl, 0, &data);
-	if (rc)
-		return rc;
-	data |= 1;
-	rc = regmap_write(iphy->usb3_ctrl, 0, data);
-	if (rc)
-		return rc;
-
-	rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 1);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-			   NSP_USB3_PHY_PLL30_BLOCK);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x1000);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL0, 0x6400);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0xc000);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0x8000);
-	if (rc)
-		return rc;
-
-	rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 0);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x9000);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-			   NSP_USB3_PHY_PIPE_BLOCK);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_CMP, 0xf30d);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_DEGLITCH, 0x6302);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-			   NSP_USB3_PHY_TX_PMD_BLOCK);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_TX_PMD_CONTROL1, 0x1003);
-
-	return rc;
-}
-
-static struct phy_ops nsp_usb3_phy_ops = {
-	.init	= nsp_usb3_phy_init,
-	.owner	= THIS_MODULE,
-};
-
-static int nsp_usb3_phy_probe(struct mdio_device *mdiodev)
-{
-	struct device *dev = &mdiodev->dev;
-	struct phy_provider *provider;
-	struct nsp_usb3_phy *iphy;
-
-	iphy = devm_kzalloc(dev, sizeof(*iphy), GFP_KERNEL);
-	if (!iphy)
-		return -ENOMEM;
-	iphy->mdiodev = mdiodev;
-
-	iphy->usb3_ctrl = syscon_regmap_lookup_by_phandle(dev->of_node,
-						 "usb3-ctrl-syscon");
-	if (IS_ERR(iphy->usb3_ctrl))
-		return PTR_ERR(iphy->usb3_ctrl);
-
-	iphy->phy = devm_phy_create(dev, dev->of_node, &nsp_usb3_phy_ops);
-	if (IS_ERR(iphy->phy)) {
-		dev_err(dev, "failed to create PHY\n");
-		return PTR_ERR(iphy->phy);
-	}
-
-	phy_set_drvdata(iphy->phy, iphy);
-
-	provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-	if (IS_ERR(provider)) {
-		dev_err(dev, "could not register PHY provider\n");
-		return PTR_ERR(provider);
-	}
-
-	return 0;
-}
-
-static const struct of_device_id nsp_usb3_phy_of_match[] = {
-	{.compatible = "brcm,nsp-usb3-phy",},
-	{ /* sentinel */ }
-};
-
-static struct mdio_driver nsp_usb3_phy_driver = {
-	.mdiodrv = {
-		.driver = {
-			.name = "nsp-usb3-phy",
-			.of_match_table = nsp_usb3_phy_of_match,
-		},
-	},
-	.probe = nsp_usb3_phy_probe,
-};
-
-mdio_module_driver(nsp_usb3_phy_driver);
-
-MODULE_DESCRIPTION("Broadcom NSP USB3 PHY driver");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com");
-- 
cgit v1.2.3


From 677941a304b72620f519e542fb7c370424dcc297 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 9 Feb 2017 00:30:23 +0100
Subject: Revert "dt-bindings: phy: Add documentation for NSP USB3 PHY"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit c8ca631f9480 ("dt-bindings: phy: Add documentation
for NSP USB3 PHY") to match reverting commit adding the new PHY driver.
Please note we revert this commit before it reached stable release.

If new compatible string is needed it should be added to the existing
bcm-ns-usb3-phy.txt which already describes this PHY.

Acked-by: Jon Mason <jon.mason@broadcom.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
---
 .../devicetree/bindings/phy/brcm,nsp-usb3-phy.txt  | 39 ----------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt

diff --git a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt b/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt
deleted file mode 100644
index e68ae5dec9c9..000000000000
--- a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Broadcom USB3 phy binding for northstar plus SoC
-The USB3 phy is internal to the SoC and is accessed using mdio interface.
-
-Required mdio bus properties:
-- reg: Should be 0x0 for SoC internal USB3 phy
-- #address-cells: must be 1
-- #size-cells: must be 0
-
-Required USB3 PHY properties:
-- compatible: should be "brcm,nsp-usb3-phy"
-- reg: USB3 Phy address on SoC internal MDIO bus and it should be 0x10.
-- usb3-ctrl-syscon: handler of syscon node defining physical address
-  of usb3 control register.
-- #phy-cells: must be 0
-
-Required usb3 control properties:
-- compatible: should be "brcm,nsp-usb3-ctrl"
-- reg: offset and length of the control registers
-
-Example:
-
-	mdio@0 {
-		reg = <0x0>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		usb3_phy: usb-phy@10 {
-			compatible = "brcm,nsp-usb3-phy";
-			reg = <0x10>;
-			usb3-ctrl-syscon = <&usb3_ctrl>;
-			#phy-cells = <0>;
-			status = "disabled";
-		};
-	};
-
-	usb3_ctrl: syscon@104408 {
-		compatible = "brcm,nsp-usb3-ctrl", "syscon";
-		reg = <0x104408 0x3fc>;
-	};
-- 
cgit v1.2.3


From 11d94e026b962eee6e1fbc4237f2cbfbec839067 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Wed, 8 Mar 2017 17:22:42 +0900
Subject: phy: phy-exynos-pcie: fix the wrong error return

When it doesn't get the blk_base's resource, it was returned
the error about phy_base, not blk_base.
This patch is for fixing the wrong error return about blk_base.

Fixes: cf0adb8e281b ("phy: phy-exynos-pcie: Add support for Exynos PCIe PHY")

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/phy-exynos-pcie.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/phy-exynos-pcie.c b/drivers/phy/phy-exynos-pcie.c
index 4f60b83641d5..60baf25d98e2 100644
--- a/drivers/phy/phy-exynos-pcie.c
+++ b/drivers/phy/phy-exynos-pcie.c
@@ -254,8 +254,8 @@ static int exynos_pcie_phy_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	exynos_phy->blk_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(exynos_phy->phy_base))
-		return PTR_ERR(exynos_phy->phy_base);
+	if (IS_ERR(exynos_phy->blk_base))
+		return PTR_ERR(exynos_phy->blk_base);
 
 	exynos_phy->drv_data = drv_data;
 
-- 
cgit v1.2.3


From 1a09b6a7c10e22c489a8b212dd6862b1fd9674ad Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Thu, 9 Mar 2017 13:45:44 +0530
Subject: phy: qcom-usb-hs: Add depends on EXTCON

We get the following compile errors if EXTCON is enabled as a
module but this driver is builtin:

drivers/built-in.o: In function `qcom_usb_hs_phy_power_off':
phy-qcom-usb-hs.c:(.text+0x1089): undefined reference to `extcon_unregister_notifier'
drivers/built-in.o: In function `qcom_usb_hs_phy_probe':
phy-qcom-usb-hs.c:(.text+0x11b5): undefined reference to `extcon_get_edev_by_phandle'
drivers/built-in.o: In function `qcom_usb_hs_phy_power_on':
phy-qcom-usb-hs.c:(.text+0x128e): undefined reference to `extcon_get_state'
phy-qcom-usb-hs.c:(.text+0x12a9): undefined reference to `extcon_register_notifier'

so let's mark this as needing to follow the modular status of
the extcon framework.

Fixes: 9994a33865f4 e2427b09ba929c2b9 (phy: Add support for Qualcomm's USB HS phy")
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index c11044439fd4..005cadb7a3f8 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -449,6 +449,7 @@ config PHY_QCOM_UFS
 config PHY_QCOM_USB_HS
 	tristate "Qualcomm USB HS PHY module"
 	depends on USB_ULPI_BUS
+	depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in
 	select GENERIC_PHY
 	help
 	  Support for the USB high-speed ULPI compliant phy on Qualcomm
-- 
cgit v1.2.3


From 28b62b1458685d8f68f67d9b2d511bf8fa32b746 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 8 Mar 2017 23:14:20 +0200
Subject: crypto: s5p-sss - Fix spinlock recursion on LRW(AES)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Running TCRYPT with LRW compiled causes spinlock recursion:

    testing speed of async lrw(aes) (lrw(ecb-aes-s5p)) encryption
    tcrypt: test 0 (256 bit key, 16 byte blocks): 19007 operations in 1 seconds (304112 bytes)
    tcrypt: test 1 (256 bit key, 64 byte blocks): 15753 operations in 1 seconds (1008192 bytes)
    tcrypt: test 2 (256 bit key, 256 byte blocks): 14293 operations in 1 seconds (3659008 bytes)
    tcrypt: test 3 (256 bit key, 1024 byte blocks): 11906 operations in 1 seconds (12191744 bytes)
    tcrypt: test 4 (256 bit key, 8192 byte blocks):
    BUG: spinlock recursion on CPU#1, irq/84-10830000/89
     lock: 0xeea99a68, .magic: dead4ead, .owner: irq/84-10830000/89, .owner_cpu: 1
    CPU: 1 PID: 89 Comm: irq/84-10830000 Not tainted 4.11.0-rc1-00001-g897ca6d0800d #559
    Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
    [<c010e1ec>] (unwind_backtrace) from [<c010ae1c>] (show_stack+0x10/0x14)
    [<c010ae1c>] (show_stack) from [<c03449c0>] (dump_stack+0x78/0x8c)
    [<c03449c0>] (dump_stack) from [<c015de68>] (do_raw_spin_lock+0x11c/0x120)
    [<c015de68>] (do_raw_spin_lock) from [<c0720110>] (_raw_spin_lock_irqsave+0x20/0x28)
    [<c0720110>] (_raw_spin_lock_irqsave) from [<c0572ca0>] (s5p_aes_crypt+0x2c/0xb4)
    [<c0572ca0>] (s5p_aes_crypt) from [<bf1d8aa4>] (do_encrypt+0x78/0xb0 [lrw])
    [<bf1d8aa4>] (do_encrypt [lrw]) from [<bf1d8b00>] (encrypt_done+0x24/0x54 [lrw])
    [<bf1d8b00>] (encrypt_done [lrw]) from [<c05732a0>] (s5p_aes_complete+0x60/0xcc)
    [<c05732a0>] (s5p_aes_complete) from [<c0573440>] (s5p_aes_interrupt+0x134/0x1a0)
    [<c0573440>] (s5p_aes_interrupt) from [<c01667c4>] (irq_thread_fn+0x1c/0x54)
    [<c01667c4>] (irq_thread_fn) from [<c0166a98>] (irq_thread+0x12c/0x1e0)
    [<c0166a98>] (irq_thread) from [<c0136a28>] (kthread+0x108/0x138)
    [<c0136a28>] (kthread) from [<c0107778>] (ret_from_fork+0x14/0x3c)

Interrupt handling routine was calling req->base.complete() under
spinlock.  In most cases this wasn't fatal but when combined with some
of the cipher modes (like LRW) this caused recursion - starting the new
encryption (s5p_aes_crypt()) while still holding the spinlock from
previous round (s5p_aes_complete()).

Beside that, the s5p_aes_interrupt() error handling path could execute
two completions in case of error for RX and TX blocks.

Rewrite the interrupt handling routine and the completion by:

1. Splitting the operations on scatterlist copies from
   s5p_aes_complete() into separate s5p_sg_done(). This still should be
   done under lock.
   The s5p_aes_complete() now only calls req->base.complete() and it has
   to be called outside of lock.

2. Moving the s5p_aes_complete() out of spinlock critical sections.
   In interrupt service routine s5p_aes_interrupts(), it appeared in few
   places, including error paths inside other functions called from ISR.
   This code was not so obvious to read so simplify it by putting the
   s5p_aes_complete() only within ISR level.

Reported-by: Nathan Royce <nroycea+kernel@gmail.com>
Cc: <stable@vger.kernel.org> # v4.10.x: 07de4bc88c crypto: s5p-sss - Fix completing
Cc: <stable@vger.kernel.org> # v4.10.x
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/s5p-sss.c | 127 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 45 deletions(-)

diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index a668286d62cb..1b9da3dc799b 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
 	scatterwalk_done(&walk, out, 0);
 }
 
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
 {
 	if (dev->sg_dst_cpy) {
 		dev_dbg(dev->dev,
@@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
 	}
 	s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
 	s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
 
-	/* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
 	dev->req->base.complete(&dev->req->base, err);
 	dev->busy = false;
 }
@@ -368,51 +371,44 @@ exit:
 }
 
 /*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new transmitting (output) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_outdata()).
  */
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
 {
-	int err = 0;
-	bool ret = false;
+	int ret = 0;
 
 	s5p_unset_outdata(dev);
 
 	if (!sg_is_last(dev->sg_dst)) {
-		err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
-		if (err)
-			s5p_aes_complete(dev, err);
-		else
-			ret = true;
-	} else {
-		s5p_aes_complete(dev, err);
-
-		dev->busy = true;
-		tasklet_schedule(&dev->tasklet);
+		ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+		if (!ret)
+			ret = 1;
 	}
 
 	return ret;
 }
 
 /*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new receiving (input) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_indata()).
  */
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
 {
-	int err;
-	bool ret = false;
+	int ret = 0;
 
 	s5p_unset_indata(dev);
 
 	if (!sg_is_last(dev->sg_src)) {
-		err = s5p_set_indata(dev, sg_next(dev->sg_src));
-		if (err)
-			s5p_aes_complete(dev, err);
-		else
-			ret = true;
+		ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+		if (!ret)
+			ret = 1;
 	}
 
 	return ret;
@@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
 	struct platform_device *pdev = dev_id;
 	struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
-	bool set_dma_tx = false;
-	bool set_dma_rx = false;
+	int err_dma_tx = 0;
+	int err_dma_rx = 0;
+	bool tx_end = false;
 	unsigned long flags;
 	uint32_t status;
+	int err;
 
 	spin_lock_irqsave(&dev->lock, flags);
 
+	/*
+	 * Handle rx or tx interrupt. If there is still data (scatterlist did not
+	 * reach end), then map next scatterlist entry.
+	 * In case of such mapping error, s5p_aes_complete() should be called.
+	 *
+	 * If there is no more data in tx scatter list, call s5p_aes_complete()
+	 * and schedule new tasklet.
+	 */
 	status = SSS_READ(dev, FCINTSTAT);
 	if (status & SSS_FCINTSTAT_BRDMAINT)
-		set_dma_rx = s5p_aes_rx(dev);
-	if (status & SSS_FCINTSTAT_BTDMAINT)
-		set_dma_tx = s5p_aes_tx(dev);
+		err_dma_rx = s5p_aes_rx(dev);
+
+	if (status & SSS_FCINTSTAT_BTDMAINT) {
+		if (sg_is_last(dev->sg_dst))
+			tx_end = true;
+		err_dma_tx = s5p_aes_tx(dev);
+	}
 
 	SSS_WRITE(dev, FCINTPEND, status);
 
-	/*
-	 * Writing length of DMA block (either receiving or transmitting)
-	 * will start the operation immediately, so this should be done
-	 * at the end (even after clearing pending interrupts to not miss the
-	 * interrupt).
-	 */
-	if (set_dma_tx)
-		s5p_set_dma_outdata(dev, dev->sg_dst);
-	if (set_dma_rx)
-		s5p_set_dma_indata(dev, dev->sg_src);
+	if (err_dma_rx < 0) {
+		err = err_dma_rx;
+		goto error;
+	}
+	if (err_dma_tx < 0) {
+		err = err_dma_tx;
+		goto error;
+	}
+
+	if (tx_end) {
+		s5p_sg_done(dev);
+
+		spin_unlock_irqrestore(&dev->lock, flags);
+
+		s5p_aes_complete(dev, 0);
+		dev->busy = true;
+		tasklet_schedule(&dev->tasklet);
+	} else {
+		/*
+		 * Writing length of DMA block (either receiving or
+		 * transmitting) will start the operation immediately, so this
+		 * should be done at the end (even after clearing pending
+		 * interrupts to not miss the interrupt).
+		 */
+		if (err_dma_tx == 1)
+			s5p_set_dma_outdata(dev, dev->sg_dst);
+		if (err_dma_rx == 1)
+			s5p_set_dma_indata(dev, dev->sg_src);
 
+		spin_unlock_irqrestore(&dev->lock, flags);
+	}
+
+	return IRQ_HANDLED;
+
+error:
+	s5p_sg_done(dev);
 	spin_unlock_irqrestore(&dev->lock, flags);
+	s5p_aes_complete(dev, err);
 
 	return IRQ_HANDLED;
 }
@@ -597,8 +633,9 @@ outdata_error:
 	s5p_unset_indata(dev);
 
 indata_error:
-	s5p_aes_complete(dev, err);
+	s5p_sg_done(dev);
 	spin_unlock_irqrestore(&dev->lock, flags);
+	s5p_aes_complete(dev, err);
 }
 
 static void s5p_tasklet_cb(unsigned long data)
-- 
cgit v1.2.3


From 05d8d34611139f8435af90ac54b65eb31e82e388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Tue, 7 Mar 2017 17:51:49 +0100
Subject: KVM: nVMX: do not warn when MSR bitmap address is not backed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(),
we have already checked that the MSR bitmap address is valid (4k aligned
and within physical limits).  SDM doesn't specify what happens if the
there is no memory mapped at the valid address, but Intel CPUs treat the
situation as if the bitmap was configured to trap all MSRs.

KVM already does that by returning false and a correct handling doesn't
need the guest-trigerrable warning that was reported by syzkaller:
(The warning was originally there to catch some possible bugs in nVMX.)

  ------------[ cut here ]------------
  WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709
  nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline]
  WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709
  nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:15 [inline]
   dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
   panic+0x1fb/0x412 kernel/panic.c:179
   __warn+0x1c4/0x1e0 kernel/panic.c:540
   warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
   nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline]
   nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640
   enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline]
   nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561
   handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312
   vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526
   vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline]
   vcpu_run arch/x86/kvm/x86.c:7044 [inline]
   kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205
   kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
[Jim Mattson explained the bare metal behavior: "I believe this behavior
 would be documented in the chipset data sheet rather than the SDM,
 since the chipset returns all 1s for an unclaimed read."]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ab338581b3ec..98e82ee1e699 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9680,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 		return false;
 
 	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-	if (!page) {
-		WARN_ON(1);
+	if (!page)
 		return false;
-	}
 	msr_bitmap_l1 = (unsigned long *)kmap(page);
 
 	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
-- 
cgit v1.2.3


From 9ad224744218a352964f31007a1420f2420a08a0 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 9 Mar 2017 11:05:33 -0300
Subject: i2c: exynos5: Avoid transaction timeouts due TRANSFER_DONE_AUTO not
 set

After commit 7999eecb7e56 ("i2c: exynos5: fix arbitration lost handling"),
some I2C transactions are failing because the TRANSFER_DONE_AUTO field is
not set in the I2C_TRANS_STATUS register so the i2c->status value is left
to -EINVAL causing the i2c->msg_complete completion to never be signaled.

For example, when reading the time of an I2C rtc on an Exynos5800 machine:

$ cat /sys/class/rtc/rtc0/time
[   25.924594] exynos5-hsi2c 12e10000.i2c: rx timeout
[   65.028365] max77686-rtc max77802-rtc: Fail to read time reg(-22)
cat: /sys/class/rtc/rtc0/time: Invalid argument

The Exynos5422 manual states clearly that most I2C_TRANS_STATUS reg bits
(including TRANSFER_DONE_AUTO) are cleared after the register is read. So
reading has side effects and should only be done if HSI2C_INT_I2C was set.

Fixes: 7999eecb7e56 ("i2c: exynos5: fix arbitration lost handling")
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-exynos5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index cbd93ce0661f..736a82472101 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -457,7 +457,6 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
 
 	int_status = readl(i2c->regs + HSI2C_INT_STATUS);
 	writel(int_status, i2c->regs + HSI2C_INT_STATUS);
-	trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 
 	/* handle interrupt related to the transfer status */
 	if (i2c->variant->hw == HSI2C_EXYNOS7) {
@@ -482,11 +481,13 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
 			goto stop;
 		}
 
+		trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 		if ((trans_status & HSI2C_MASTER_ST_MASK) == HSI2C_MASTER_ST_LOSE) {
 			i2c->state = -EAGAIN;
 			goto stop;
 		}
 	} else if (int_status & HSI2C_INT_I2C) {
+		trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 		if (trans_status & HSI2C_NO_DEV_ACK) {
 			dev_dbg(i2c->dev, "No ACK from device\n");
 			i2c->state = -ENXIO;
-- 
cgit v1.2.3


From 8ce0928e6867fda4d208d7bddf251bce96ab8431 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 9 Mar 2017 16:32:17 +0100
Subject: Revert "i2c: add missing of_node_put in i2c_mux_del_adapters"

This reverts commit 02dbfa5e5583523035f05636c614a0eca77f1aab. I grabbed
the wrong version from the list and will pull the proper one from Peter
Rosin's mux tree.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-mux.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 2178266bca79..83768e85a919 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,7 +429,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 	while (muxc->num_adapters) {
 		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
 		struct i2c_mux_priv *priv = adap->algo_data;
-		struct device_node *np = adap->dev.of_node;
 
 		muxc->adapter[muxc->num_adapters] = NULL;
 
@@ -439,7 +438,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 
 		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
 		i2c_del_adapter(adap);
-		of_node_put(np);
 		kfree(priv);
 	}
 }
-- 
cgit v1.2.3


From 806dbb20efde821910b5f747befed794077a9109 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 9 Mar 2017 16:41:48 +0100
Subject: Revert "i2c: copy device properties when using
 i2c_register_board_info()"

This reverts commit b0c1e95ab44feaad8831f2c06a3473c974003b49. It
contains a flaw and the next version has more features added which makes
me want to move it to the next cycle.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-boardinfo.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index 5b8f6c3a6950..6e5fac6a5262 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
-#include <linux/property.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 
@@ -56,7 +55,6 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  *
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
- * Device properties are deep-copied though.
  */
 int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
@@ -80,14 +78,6 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig
 
 		devinfo->busnum = busnum;
 		devinfo->board_info = *info;
-
-		if (info->properties) {
-			devinfo->board_info.properties =
-					property_entries_dup(info->properties);
-			if (IS_ERR(devinfo->board_info.properties))
-				return PTR_ERR(devinfo->board_info.properties);
-		}
-
 		list_add_tail(&devinfo->list, &__i2c_board_list);
 	}
 
-- 
cgit v1.2.3


From 6022c5cadf1a43ca30f431f128daa6163909ad60 Mon Sep 17 00:00:00 2001
From: Yuantian Tang <andy.tang@nxp.com>
Date: Thu, 9 Mar 2017 17:13:29 +0800
Subject: ahci: qoriq: correct the sata ecc setting error

Sata ecc is controlled by only 1 bit which is 24bit in big-endian
in ecc register. So only setting 24bit to disable sata ecc prevents
other bits from being overwritten in ecc register.

Signed-off-by: Tang Yuantian <andy.tang@nxp.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_qoriq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index 85d833289f28..4c96f3ac4976 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -177,7 +177,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 	case AHCI_LS1043A:
 		if (!qpriv->ecc_addr)
 			return -EINVAL;
-		writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+		writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+				qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
@@ -194,7 +195,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 	case AHCI_LS1046A:
 		if (!qpriv->ecc_addr)
 			return -EINVAL;
-		writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+		writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+				qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
-- 
cgit v1.2.3


From 94a631d91ad341b3b4bdac72d1104d9f090e0ca9 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Thu, 9 Mar 2017 15:39:34 +0200
Subject: usb: xhci-mtk: check hcc_params after adding primary hcd

hcc_params is set in xhci_gen_setup() called from usb_add_hcd(),
so checks the Maximum Primary Stream Array Size in the hcc_params
register after adding primary hcd.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 9066ec9e0c2e..6ac73a6b4da6 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -678,13 +678,13 @@ static int xhci_mtk_probe(struct platform_device *pdev)
 		goto power_off_phys;
 	}
 
-	if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
-		xhci->shared_hcd->can_do_streams = 1;
-
 	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
 	if (ret)
 		goto put_usb3_hcd;
 
+	if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
+		xhci->shared_hcd->can_do_streams = 1;
+
 	ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
 	if (ret)
 		goto dealloc_usb2_hcd;
-- 
cgit v1.2.3


From 20e4e37e4a2f1bfd43bcc8c3e666e47665036cc3 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Thu, 9 Mar 2017 15:39:35 +0200
Subject: usb: xhci: remove dummy extra_priv_size for size of xhci_hcd struct

because hcd_priv_size is already size of xhci_hcd struct,
extra_priv_size is not needed anymore for MTK and tegra drivers.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Tested-by: Thierry Reding <treding@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c   | 1 -
 drivers/usb/host/xhci-tegra.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 6ac73a6b4da6..67d5dc79b6b5 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -382,7 +382,6 @@ static int usb_wakeup_of_property_parse(struct xhci_hcd_mtk *mtk,
 
 static int xhci_mtk_setup(struct usb_hcd *hcd);
 static const struct xhci_driver_overrides xhci_mtk_overrides __initconst = {
-	.extra_priv_size = sizeof(struct xhci_hcd),
 	.reset = xhci_mtk_setup,
 };
 
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index a59fafb4b329..74436f8ca538 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1308,7 +1308,6 @@ static int tegra_xhci_setup(struct usb_hcd *hcd)
 }
 
 static const struct xhci_driver_overrides tegra_xhci_overrides __initconst = {
-	.extra_priv_size = sizeof(struct xhci_hcd),
 	.reset = tegra_xhci_setup,
 };
 
-- 
cgit v1.2.3


From f95e60a7dbecd2de816bb3ad517b3d4fbc20b507 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Thu, 9 Mar 2017 15:39:36 +0200
Subject: usb: host: xhci-dbg: HCIVERSION should be a binary number

According to xHCI spec, HCIVERSION containing a BCD encoding
of the xHCI specification revision number, 0100h corresponds
to xHCI version 1.0. Change "100" as "0x100".

Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Fixes: 04abb6de2825 ("xhci: Read and parse new xhci
	1.1 capability register")
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-dbg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index 363d125300ea..2b4a00fa735d 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -109,7 +109,7 @@ static void xhci_print_cap_regs(struct xhci_hcd *xhci)
 	xhci_dbg(xhci, "RTSOFF 0x%x:\n", temp & RTSOFF_MASK);
 
 	/* xhci 1.1 controllers have the HCCPARAMS2 register */
-	if (hci_version > 100) {
+	if (hci_version > 0x100) {
 		temp = readl(&xhci->cap_regs->hcc_params2);
 		xhci_dbg(xhci, "HCC PARAMS2 0x%x:\n", (unsigned int) temp);
 		xhci_dbg(xhci, "  HC %s Force save context capability",
-- 
cgit v1.2.3


From dcc7620cad5ad1326a78f4031a7bf4f0e5b42984 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 9 Mar 2017 15:39:37 +0200
Subject: usb: host: xhci-plat: Fix timeout on removal of hot pluggable xhci
 controllers

Upstream commit 98d74f9ceaef ("xhci: fix 10 second timeout on removal of
PCI hotpluggable xhci controllers") fixes a problem with hot pluggable PCI
xhci controllers which can result in excessive timeouts, to the point where
the system reports a deadlock.

The same problem is seen with hot pluggable xhci controllers using the
xhci-plat driver, such as the driver used for Type-C ports on rk3399.
Similar to hot-pluggable PCI controllers, the driver for this chip
removes the xhci controller from the system when the Type-C cable is
disconnected.

The solution for PCI devices works just as well for non-PCI devices
and avoids the problem.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-plat.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 6d33b42ffcf5..bd02a6cd8e2c 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -286,6 +286,8 @@ static int xhci_plat_remove(struct platform_device *dev)
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	struct clk *clk = xhci->clk;
 
+	xhci->xhc_state |= XHCI_STATE_REMOVING;
+
 	usb_remove_hcd(xhci->shared_hcd);
 	usb_phy_shutdown(hcd->usb_phy);
 
-- 
cgit v1.2.3


From 6d399783e9d4e9bd44931501948059d24ad96ff8 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 23 Feb 2017 12:26:41 -0800
Subject: md/raid10: submit bio directly to replacement disk

Commit 57c67df(md/raid10: submit IO from originating thread instead of
md thread) submits bio directly for normal disks but not for replacement
disks. There is no point we shouldn't do this for replacement disks.

Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 063c43d83b72..1443305613c5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1477,11 +1477,24 @@ retry_write:
 			mbio->bi_bdev = (void*)rdev;
 
 			atomic_inc(&r10_bio->remaining);
+
+			cb = blk_check_plugged(raid10_unplug, mddev,
+					       sizeof(*plug));
+			if (cb)
+				plug = container_of(cb, struct raid10_plug_cb,
+						    cb);
+			else
+				plug = NULL;
 			spin_lock_irqsave(&conf->device_lock, flags);
-			bio_list_add(&conf->pending_bio_list, mbio);
-			conf->pending_count++;
+			if (plug) {
+				bio_list_add(&plug->pending, mbio);
+				plug->pending_cnt++;
+			} else {
+				bio_list_add(&conf->pending_bio_list, mbio);
+				conf->pending_count++;
+			}
 			spin_unlock_irqrestore(&conf->device_lock, flags);
-			if (!mddev_check_plugged(mddev))
+			if (!plug)
 				md_wakeup_thread(mddev->thread);
 		}
 	}
-- 
cgit v1.2.3


From 99b3d74ec05c4a4c57766a90d65b53d78ab06404 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 23 Feb 2017 12:31:10 -0800
Subject: md: delete dead code

Nobody is using mddev_check_plugged(), so delete the dead code

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 8 --------
 drivers/md/md.h | 6 ------
 2 files changed, 14 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 548d1b8014f8..82bd1f3d2b19 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_flush_request);
 
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
-	struct mddev *mddev = cb->data;
-	md_wakeup_thread(mddev->thread);
-	kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
 static inline struct mddev *mddev_get(struct mddev *mddev)
 {
 	atomic_inc(&mddev->active);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b8859cbf84b6..dde8ecb760c8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   struct mddev *mddev);
 
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);
 extern void md_kick_rdev_from_array(struct md_rdev * rdev);
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
-	return !!blk_check_plugged(md_unplug, mddev,
-				   sizeof(struct blk_plug_cb));
-}
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
 {
-- 
cgit v1.2.3


From 9c8043f337f14d1743006dfc59c03e80a42e3884 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 24 Feb 2017 11:15:12 +0800
Subject: md-cluster: free md_cluster_info if node leave cluster

To avoid memory leak, we need to free the cinfo which
is allocated when node join cluster.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md-cluster.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2b13117fb918..ba7edcdd09ce 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -974,6 +974,7 @@ static int leave(struct mddev *mddev)
 	lockres_free(cinfo->bitmap_lockres);
 	unlock_all_bitmaps(mddev);
 	dlm_release_lockspace(cinfo->lockspace, 2);
+	kfree(cinfo);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 75df023f4f2188c21181996e28234fef9351ef45 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 24 Feb 2017 11:15:13 +0800
Subject: md-cluster: remove useless memset from gather_all_resync_info

This memset is not needed.  The lvb is already zeroed because
it was recently allocated by lockres_init, which uses kzalloc(),
and read_resync_info() doesn't need it to be zero anyway.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md-cluster.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index ba7edcdd09ce..321ecac23027 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 		bm_lockres->flags |= DLM_LKF_NOQUEUE;
 		ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
 		if (ret == -EAGAIN) {
-			memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
 			s = read_resync_info(mddev, bm_lockres);
 			if (s) {
 				pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
-- 
cgit v1.2.3


From c94836342192b05d599d6aa3397f732f7a238689 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 24 Feb 2017 11:15:23 +0800
Subject: md: move funcs from pers->resize to update_size

raid1_resize and raid5_resize should also check the
mddev->queue if run underneath dm-raid.

And both set_capacity and revalidate_disk are used in
pers->resize such as raid1, raid10 and raid5. So
move them from personality file to common code.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c     | 8 ++++++--
 drivers/md/raid1.c  | 2 --
 drivers/md/raid10.c | 4 ----
 drivers/md/raid5.c  | 2 --
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82bd1f3d2b19..bd15a18485c8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6525,8 +6525,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
 			return -ENOSPC;
 	}
 	rv = mddev->pers->resize(mddev, num_sectors);
-	if (!rv)
-		revalidate_disk(mddev->gendisk);
+	if (!rv) {
+		if (mddev->queue) {
+			set_capacity(mddev->gendisk, mddev->array_sectors);
+			revalidate_disk(mddev->gendisk);
+		}
+	}
 	return rv;
 }
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fbc2d7851b49..10c3865e1186 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3246,8 +3246,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
 			return ret;
 	}
 	md_set_array_sectors(mddev, newsize);
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > mddev->dev_sectors) {
 		mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1443305613c5..c4db6d1fb6a2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3956,10 +3956,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
 			return ret;
 	}
 	md_set_array_sectors(mddev, size);
-	if (mddev->queue) {
-		set_capacity(mddev->gendisk, mddev->array_sectors);
-		revalidate_disk(mddev->gendisk);
-	}
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > oldsize) {
 		mddev->recovery_cp = oldsize;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4fb09b3fcb41..6bfedfcf41c1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7605,8 +7605,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
 			return ret;
 	}
 	md_set_array_sectors(mddev, newsize);
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > mddev->dev_sectors) {
 		mddev->recovery_cp = mddev->dev_sectors;
-- 
cgit v1.2.3


From 1b3bae49fba52f1ec499c36c53bc07761a9f6c4d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 1 Mar 2017 07:31:28 +1100
Subject: md: don't impose the MD_SB_DISKS limit on arrays without metadata.

These arrays, created with "mdadm --build" don't benefit from a limit.
The default will be used, which is '0' and is interpreted as "don't
impose a limit".

Reported-by: ian_bruce@mail.ru
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index bd15a18485c8..cd89ad3c3a0d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6450,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
 	mddev->layout        = info->layout;
 	mddev->chunk_sectors = info->chunk_size >> 9;
 
-	mddev->max_disks     = MD_SB_DISKS;
-
 	if (mddev->persistent) {
-		mddev->flags         = 0;
-		mddev->sb_flags         = 0;
+		mddev->max_disks = MD_SB_DISKS;
+		mddev->flags = 0;
+		mddev->sb_flags = 0;
 	}
 	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
 
-- 
cgit v1.2.3


From 61eb2b43b99ebdc9bc6bc83d9792257b243e7cb3 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Tue, 28 Feb 2017 13:00:20 -0800
Subject: md/raid1/10: fix potential deadlock

Neil Brown pointed out a potential deadlock in raid 10 code with
bio_split/chain. The raid1 code could have the same issue, but recent
barrier rework makes it less likely to happen. The deadlock happens in
below sequence:

1. generic_make_request(bio), this will set current->bio_list
2. raid10_make_request will split bio to bio1 and bio2
3. __make_request(bio1), wait_barrer, add underlayer disk bio to
current->bio_list
4. __make_request(bio2), wait_barrer

If raise_barrier happens between 3 & 4, since wait_barrier runs at 3,
raise_barrier waits for IO completion from 3. And since raise_barrier
sets barrier, 4 waits for raise_barrier. But IO from 3 can't be
dispatched because raid10_make_request() doesn't finished yet.

The solution is to adjust the IO ordering. Quotes from Neil:
"
It is much safer to:

    if (need to split) {
        split = bio_split(bio, ...)
        bio_chain(...)
        make_request_fn(split);
        generic_make_request(bio);
   } else
        make_request_fn(mddev, bio);

This way we first process the initial section of the bio (in 'split')
which will queue some requests to the underlying devices.  These
requests will be queued in generic_make_request.
Then we queue the remainder of the bio, which will be added to the end
of the generic_make_request queue.
Then we return.
generic_make_request() will pop the lower-level device requests off the
queue and handle them first.  Then it will process the remainder
of the original bio once the first section has been fully processed.
"

Note, this only happens in read path. In write path, the bio is flushed to
underlaying disks either by blk flush (from schedule) or offladed to raid1/10d.
It's queued in current->bio_list.

Cc: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org (v3.14+, only the raid10 part)
Suggested-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c  | 25 +++++++++++++++++++++++--
 drivers/md/raid10.c | 18 ++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 10c3865e1186..c33e96e33b8e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
 			split = bio;
 		}
 
-		if (bio_data_dir(split) == READ)
+		if (bio_data_dir(split) == READ) {
 			raid1_read_request(mddev, split);
-		else
+
+			/*
+			 * If a bio is splitted, the first part of bio will
+			 * pass barrier but the bio is queued in
+			 * current->bio_list (see generic_make_request). If
+			 * there is a raise_barrier() called here, the second
+			 * part of bio can't pass barrier. But since the first
+			 * part bio isn't dispatched to underlaying disks yet,
+			 * the barrier is never released, hence raise_barrier
+			 * will alays wait. We have a deadlock.
+			 * Note, this only happens in read path. For write
+			 * path, the first part of bio is dispatched in a
+			 * schedule() call (because of blk plug) or offloaded
+			 * to raid10d.
+			 * Quitting from the function immediately can change
+			 * the bio order queued in bio_list and avoid the deadlock.
+			 */
+			if (split != bio) {
+				generic_make_request(bio);
+				break;
+			}
+		} else
 			raid1_write_request(mddev, split);
 	} while (split != bio);
 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c4db6d1fb6a2..b1b1f982a722 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1584,7 +1584,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 			split = bio;
 		}
 
+		/*
+		 * If a bio is splitted, the first part of bio will pass
+		 * barrier but the bio is queued in current->bio_list (see
+		 * generic_make_request). If there is a raise_barrier() called
+		 * here, the second part of bio can't pass barrier. But since
+		 * the first part bio isn't dispatched to underlaying disks
+		 * yet, the barrier is never released, hence raise_barrier will
+		 * alays wait. We have a deadlock.
+		 * Note, this only happens in read path. For write path, the
+		 * first part of bio is dispatched in a schedule() call
+		 * (because of blk plug) or offloaded to raid10d.
+		 * Quitting from the function immediately can change the bio
+		 * order queued in bio_list and avoid the deadlock.
+		 */
 		__make_request(mddev, split);
+		if (split != bio && bio_data_dir(bio) == READ) {
+			generic_make_request(bio);
+			break;
+		}
 	} while (split != bio);
 
 	/* In case raid10d snuck in to freeze_array */
-- 
cgit v1.2.3


From 6fb895692a034393d58679cd8d00c9e229719a5f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:02 +0300
Subject: x86/cpufeature: Add 5-level paging detection

Look for 'la57' in /proc/cpuinfo to see if your machine supports 5-level
paging.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4e7772387c6e..b04bb6dfed7f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -289,7 +289,8 @@
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_RDPID	(16*32+ 22) /* RDPID instruction */
+#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
-- 
cgit v1.2.3


From 505a60e225606fbd3d2eadc31ff793d939ba66f1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:03 +0300
Subject: asm-generic: introduce 5level-fixup.h

We are going to switch core MM to 5-level paging abstraction.

This is preparation step which adds <asm-generic/5level-fixup.h>
As with 4level-fixup.h, the new header allows quickly make all
architectures compatible with 5-level paging in core MM.

In long run we would like to switch architectures to properly folded p4d
level by using <asm-generic/pgtable-nop4d.h>, but it requires more
changes to arch-specific code.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/4level-fixup.h |  3 ++-
 include/asm-generic/5level-fixup.h | 41 ++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h                 |  3 +++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/5level-fixup.h

diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 5bdab6bffd23..928fd66b1271 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -15,7 +15,6 @@
 	((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
  		NULL: pmd_offset(pud, address))
 
-#define pud_alloc(mm, pgd, address)	(pgd)
 #define pud_offset(pgd, start)		(pgd)
 #define pud_none(pud)			0
 #define pud_bad(pud)			0
@@ -35,4 +34,6 @@
 #undef  pud_addr_end
 #define pud_addr_end(addr, end)		(end)
 
+#include <asm-generic/5level-fixup.h>
+
 #endif
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
new file mode 100644
index 000000000000..b5ca82dc4175
--- /dev/null
+++ b/include/asm-generic/5level-fixup.h
@@ -0,0 +1,41 @@
+#ifndef _5LEVEL_FIXUP_H
+#define _5LEVEL_FIXUP_H
+
+#define __ARCH_HAS_5LEVEL_HACK
+#define __PAGETABLE_P4D_FOLDED
+
+#define P4D_SHIFT			PGDIR_SHIFT
+#define P4D_SIZE			PGDIR_SIZE
+#define P4D_MASK			PGDIR_MASK
+#define PTRS_PER_P4D			1
+
+#define p4d_t				pgd_t
+
+#define pud_alloc(mm, p4d, address) \
+	((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
+		NULL : pud_offset(p4d, address))
+
+#define p4d_alloc(mm, pgd, address)	(pgd)
+#define p4d_offset(pgd, start)		(pgd)
+#define p4d_none(p4d)			0
+#define p4d_bad(p4d)			0
+#define p4d_present(p4d)		1
+#define p4d_ERROR(p4d)			do { } while (0)
+#define p4d_clear(p4d)			pgd_clear(p4d)
+#define p4d_val(p4d)			pgd_val(p4d)
+#define p4d_populate(mm, p4d, pud)	pgd_populate(mm, p4d, pud)
+#define p4d_page(p4d)			pgd_page(p4d)
+#define p4d_page_vaddr(p4d)		pgd_page_vaddr(p4d)
+
+#define __p4d(x)			__pgd(x)
+#define set_p4d(p4dp, p4d)		set_pgd(p4dp, p4d)
+
+#undef p4d_free_tlb
+#define p4d_free_tlb(tlb, x, addr)	do { } while (0)
+#define p4d_free(mm, x)			do { } while (0)
+#define __p4d_free_tlb(tlb, x, addr)	do { } while (0)
+
+#undef  p4d_addr_end
+#define p4d_addr_end(addr, end)		(end)
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0d65dd72c0f4..be1fe264eb37 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1619,11 +1619,14 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
  * Remove it when 4level-fixup.h has been removed.
  */
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
+
+#ifndef __ARCH_HAS_5LEVEL_HACK
 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 {
 	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
 		NULL: pud_offset(pgd, address);
 }
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
 
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
-- 
cgit v1.2.3


From 30ec842660bd0d056d4a7028ac5bd4a82b113d4f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:04 +0300
Subject: asm-generic: introduce __ARCH_USE_5LEVEL_HACK

We are going to introduce <asm-generic/pgtable-nop4d.h> to provide
abstraction for properly (in opposite to 5level-fixup.h hack) folded
p4d level. The new header will be included from pgtable-nopud.h.

If an architecture uses <asm-generic/nop*d.h>, we cannot use
5level-fixup.h directly to quickly convert the architecture to 5-level
paging as it would conflict with pgtable-nop4d.h.

With this patch an architecture can define __ARCH_USE_5LEVEL_HACK before
inclusion <asm-genenric/nop*d.h> to use 5level-fixup.h.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable-nop4d-hack.h | 62 ++++++++++++++++++++++++++++++++
 include/asm-generic/pgtable-nopud.h      |  5 +++
 2 files changed, 67 insertions(+)
 create mode 100644 include/asm-generic/pgtable-nop4d-hack.h

diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h
new file mode 100644
index 000000000000..752fb7511750
--- /dev/null
+++ b/include/asm-generic/pgtable-nop4d-hack.h
@@ -0,0 +1,62 @@
+#ifndef _PGTABLE_NOP4D_HACK_H
+#define _PGTABLE_NOP4D_HACK_H
+
+#ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
+
+#define __PAGETABLE_PUD_FOLDED
+
+/*
+ * Having the pud type consist of a pgd gets the size right, and allows
+ * us to conceptually access the pgd entry that this pud is folded into
+ * without casting.
+ */
+typedef struct { pgd_t pgd; } pud_t;
+
+#define PUD_SHIFT	PGDIR_SHIFT
+#define PTRS_PER_PUD	1
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pud is never bad, and a pud always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+static inline void pgd_clear(pgd_t *pgd)	{ }
+#define pud_ERROR(pud)				(pgd_ERROR((pud).pgd))
+
+#define pgd_populate(mm, pgd, pud)		do { } while (0)
+/*
+ * (puds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pgd(pgdptr, pgdval)	set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+	return (pud_t *)pgd;
+}
+
+#define pud_val(x)				(pgd_val((x).pgd))
+#define __pud(x)				((pud_t) { __pgd(x) })
+
+#define pgd_page(pgd)				(pud_page((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd)			(pud_page_vaddr((pud_t){ pgd }))
+
+/*
+ * allocating and freeing a pud is trivial: the 1-entry pud is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+#define pud_alloc_one(mm, address)		NULL
+#define pud_free(mm, x)				do { } while (0)
+#define __pud_free_tlb(tlb, x, a)		do { } while (0)
+
+#undef  pud_addr_end
+#define pud_addr_end(addr, end)			(end)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PGTABLE_NOP4D_HACK_H */
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 810431d8351b..5e49430a30a4 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -3,6 +3,10 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef __ARCH_USE_5LEVEL_HACK
+#include <asm-generic/pgtable-nop4d-hack.h>
+#else
+
 #define __PAGETABLE_PUD_FOLDED
 
 /*
@@ -58,4 +62,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
 #define pud_addr_end(addr, end)			(end)
 
 #endif /* __ASSEMBLY__ */
+#endif /* !__ARCH_USE_5LEVEL_HACK */
 #endif /* _PGTABLE_NOPUD_H */
-- 
cgit v1.2.3


From 9849a5697d3defb2087cb6b9be5573a142697889 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:05 +0300
Subject: arch, mm: convert all architectures to use 5level-fixup.h

If an architecture uses 4level-fixup.h we don't need to do anything as
it includes 5level-fixup.h.

If an architecture uses pgtable-nop*d.h, define __ARCH_USE_5LEVEL_HACK
before inclusion of the header. It makes asm-generic code to use
5level-fixup.h.

If an architecture has 4-level paging or folds levels on its own,
include 5level-fixup.h directly.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/hugepage.h                  | 1 +
 arch/arc/include/asm/pgtable.h                   | 1 +
 arch/arm/include/asm/pgtable.h                   | 1 +
 arch/arm64/include/asm/pgtable-types.h           | 4 ++++
 arch/avr32/include/asm/pgtable-2level.h          | 1 +
 arch/cris/include/asm/pgtable.h                  | 1 +
 arch/frv/include/asm/pgtable.h                   | 1 +
 arch/h8300/include/asm/pgtable.h                 | 1 +
 arch/hexagon/include/asm/pgtable.h               | 1 +
 arch/ia64/include/asm/pgtable.h                  | 2 ++
 arch/metag/include/asm/pgtable.h                 | 1 +
 arch/microblaze/include/asm/page.h               | 3 ++-
 arch/mips/include/asm/pgtable-32.h               | 1 +
 arch/mips/include/asm/pgtable-64.h               | 1 +
 arch/mn10300/include/asm/page.h                  | 1 +
 arch/nios2/include/asm/pgtable.h                 | 1 +
 arch/openrisc/include/asm/pgtable.h              | 1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h     | 1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h     | 3 +++
 arch/powerpc/include/asm/nohash/32/pgtable.h     | 1 +
 arch/powerpc/include/asm/nohash/64/pgtable-4k.h  | 3 +++
 arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 1 +
 arch/s390/include/asm/pgtable.h                  | 1 +
 arch/score/include/asm/pgtable.h                 | 1 +
 arch/sh/include/asm/pgtable-2level.h             | 1 +
 arch/sh/include/asm/pgtable-3level.h             | 1 +
 arch/sparc/include/asm/pgtable_64.h              | 1 +
 arch/tile/include/asm/pgtable_32.h               | 1 +
 arch/tile/include/asm/pgtable_64.h               | 1 +
 arch/um/include/asm/pgtable-2level.h             | 1 +
 arch/um/include/asm/pgtable-3level.h             | 1 +
 arch/unicore32/include/asm/pgtable.h             | 1 +
 arch/x86/include/asm/pgtable_types.h             | 4 ++++
 arch/xtensa/include/asm/pgtable.h                | 1 +
 34 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 317ff773e1ca..b18fcb606908 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -11,6 +11,7 @@
 #define _ASM_ARC_HUGEPAGE_H
 
 #include <linux/types.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pte_t pmd_pte(pmd_t pmd)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index e94ca72b974e..ee22d40afef4 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -37,6 +37,7 @@
 
 #include <asm/page.h>
 #include <asm/mmu.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <linux/const.h>
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index a8d656d9aec7..1c462381c225 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -20,6 +20,7 @@
 
 #else
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 69b2fd41503c..345a072b5856 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t;
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
 #if CONFIG_PGTABLE_LEVELS == 2
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #elif CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 4
+#include <asm-generic/5level-fixup.h>
 #endif
 
 #endif	/* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h
index 425dd567b5b9..d5b1c63993ec 100644
--- a/arch/avr32/include/asm/pgtable-2level.h
+++ b/arch/avr32/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
 #define __ASM_AVR32_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /*
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index 2a3210ba4c72..fa3a73004cc5 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -6,6 +6,7 @@
 #define _CRIS_PGTABLE_H
 
 #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h
index a0513d463a1f..ab6e7e961b54 100644
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -16,6 +16,7 @@
 #ifndef _ASM_PGTABLE_H
 #define _ASM_PGTABLE_H
 
+#include <asm-generic/5level-fixup.h>
 #include <asm/mem-layout.h>
 #include <asm/setup.h>
 #include <asm/processor.h>
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h
index 8341db67821d..7d265d28ba5e 100644
--- a/arch/h8300/include/asm/pgtable.h
+++ b/arch/h8300/include/asm/pgtable.h
@@ -1,5 +1,6 @@
 #ifndef _H8300_PGTABLE_H
 #define _H8300_PGTABLE_H
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
 #define pgtable_cache_init()   do { } while (0)
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 49eab8136ec3..24a9177fb897 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -26,6 +26,7 @@
  */
 #include <linux/swap.h>
 #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* A handy thing to have if one has the RAM. Declared in head.S */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 384794e665fc..6cc22c8d8923 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr;
 
 
 #if CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #endif
+#include <asm-generic/5level-fixup.h>
 #include <asm-generic/pgtable.h>
 
 #endif /* _ASM_IA64_PGTABLE_H */
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index ffa3a3a2ecad..0c151e5af079 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -6,6 +6,7 @@
 #define _METAG_PGTABLE_H
 
 #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index fd850879854d..d506bb0893f9 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 #   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
 typedef struct { pmd_t		pue[1]; }	pud_t;
-typedef struct { pud_t		pge[1]; }	pgd_t;
+typedef struct { pud_t		p4e[1]; }	p4d_t;
+typedef struct { p4d_t		pge[1]; }	pgd_t;
 #   endif /* CONFIG_MMU */
 
 # define pte_val(x)	((x).pte)
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index d21f3da7bdb6..6f94bed571c4 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,6 +16,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 extern int temp_tlb_entry;
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 514cbc0a6a67..130a2a6c1531 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,6 +17,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
+#define __ARCH_USE_5LEVEL_HACK
 #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
 #include <asm-generic/pgtable-nopmd.h>
 #else
diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h
index 3810a6f740fd..dfe730a5ede0 100644
--- a/arch/mn10300/include/asm/page.h
+++ b/arch/mn10300/include/asm/page.h
@@ -57,6 +57,7 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) })
 #define __pgprot(x)	((pgprot_t) { (x) })
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 298393c3cb42..db4f7d179220 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -22,6 +22,7 @@
 #include <asm/tlbflush.h>
 
 #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #define FIRST_USER_ADDRESS	0UL
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 3567aa7be555..ff97374ca069 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -25,6 +25,7 @@
 #ifndef __ASM_OPENRISC_PGTABLE_H
 #define __ASM_OPENRISC_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 012223638815..26ed228d4dc6 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
 #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #include <asm/book3s/32/hash.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1eeeb72c7015..13c39b6d5d64 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1,9 +1,12 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 
+#include <asm-generic/5level-fixup.h>
+
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
 #endif
+
 /*
  * Common bits between hash and Radix page table
  */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index ba9921bf202e..5134ade2e850 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
 #define _ASM_POWERPC_NOHASH_32_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
index d0db98793dd8..9f4de0a1035e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+
+#include <asm-generic/5level-fixup.h>
+
 /*
  * Entries per page directory level.  The PTE level must use a 64b record
  * for each page table entry.  The PMD and PGD level use a 32b record for
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
index 55b28ef3409a..1facb584dd29 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7ed1972b1920..93e37b12e882 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -24,6 +24,7 @@
  * the S390 page table tree.
  */
 #ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h
index 0553e5cd5985..46ff8fd678a7 100644
--- a/arch/score/include/asm/pgtable.h
+++ b/arch/score/include/asm/pgtable.h
@@ -2,6 +2,7 @@
 #define _ASM_SCORE_PGTABLE_H
 
 #include <linux/const.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #include <asm/fixmap.h>
diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h
index 19bd89db17e7..f75cf4387257 100644
--- a/arch/sh/include/asm/pgtable-2level.h
+++ b/arch/sh/include/asm/pgtable-2level.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_PGTABLE_2LEVEL_H
 #define __ASM_SH_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /*
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index 249a985d9648..9b1e776eca31 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_PGTABLE_3LEVEL_H
 #define __ASM_SH_PGTABLE_3LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /*
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 56e49c8f770d..8a598528ec1f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -12,6 +12,7 @@
  * the SpitFire page tables.
  */
 
+#include <asm-generic/5level-fixup.h>
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <asm/types.h>
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index d26a42279036..5f8c615cb5e9 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
 #define MAXMEM		(_VMALLOC_START - PAGE_OFFSET)
 
 /* We have no pmd or pud since we are strictly a two-level page table */
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline int pud_huge_page(pud_t pud)	{ return 0; }
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index e96cec52f6d8..96fe58b45118 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -59,6 +59,7 @@
 #ifndef __ASSEMBLY__
 
 /* We have no pud since we are a three-level page table. */
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /*
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index cfbe59752469..179c0ea87a0c 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
 #ifndef __UM_PGTABLE_2LEVEL_H
 #define __UM_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index bae8523a162f..c4d876dfb9ac 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,6 +7,7 @@
 #ifndef __UM_PGTABLE_3LEVEL_H
 #define __UM_PGTABLE_3LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 818d0f5598e3..a4f2bef37e70 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -12,6 +12,7 @@
 #ifndef __UNICORE_PGTABLE_H__
 #define __UNICORE_PGTABLE_H__
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/cpu-single.h>
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8b4de22d6429..62484333673d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
 }
 
 #if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/5level-fixup.h>
+
 typedef struct { pudval_t pud; } pud_t;
 
 static inline pud_t native_make_pud(pmdval_t val)
@@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud)
 	return pud.pud;
 }
 #else
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 static inline pudval_t native_pud_val(pud_t pud)
@@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 	return pmd.pmd;
 }
 #else
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 8aa0e0d9cbb2..30dd5b2e4ad5 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -11,6 +11,7 @@
 #ifndef _XTENSA_PGTABLE_H
 #define _XTENSA_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/page.h>
 #include <asm/kmem_layout.h>
-- 
cgit v1.2.3


From 048456dcf2c56ad6f6248e2899dda92fb6a613f6 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:06 +0300
Subject: asm-generic: introduce <asm-generic/pgtable-nop4d.h>

Like with pgtable-nopud.h for 4-level paging, this new header is base
for converting an architectures to properly folded p4d_t level.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable-nop4d.h | 56 +++++++++++++++++++++++++++++++++++++
 include/asm-generic/pgtable-nopud.h | 43 ++++++++++++++--------------
 include/asm-generic/tlb.h           | 14 ++++++++--
 3 files changed, 89 insertions(+), 24 deletions(-)
 create mode 100644 include/asm-generic/pgtable-nop4d.h

diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
new file mode 100644
index 000000000000..de364ecb8df6
--- /dev/null
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -0,0 +1,56 @@
+#ifndef _PGTABLE_NOP4D_H
+#define _PGTABLE_NOP4D_H
+
+#ifndef __ASSEMBLY__
+
+#define __PAGETABLE_P4D_FOLDED
+
+typedef struct { pgd_t pgd; } p4d_t;
+
+#define P4D_SHIFT	PGDIR_SHIFT
+#define PTRS_PER_P4D	1
+#define P4D_SIZE	(1UL << P4D_SHIFT)
+#define P4D_MASK	(~(P4D_SIZE-1))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the p4d is never bad, and a p4d always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+static inline void pgd_clear(pgd_t *pgd)	{ }
+#define p4d_ERROR(p4d)				(pgd_ERROR((p4d).pgd))
+
+#define pgd_populate(mm, pgd, p4d)		do { } while (0)
+/*
+ * (p4ds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pgd(pgdptr, pgdval)	set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+	return (p4d_t *)pgd;
+}
+
+#define p4d_val(x)				(pgd_val((x).pgd))
+#define __p4d(x)				((p4d_t) { __pgd(x) })
+
+#define pgd_page(pgd)				(p4d_page((p4d_t){ pgd }))
+#define pgd_page_vaddr(pgd)			(p4d_page_vaddr((p4d_t){ pgd }))
+
+/*
+ * allocating and freeing a p4d is trivial: the 1-entry p4d is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+#define p4d_alloc_one(mm, address)		NULL
+#define p4d_free(mm, x)				do { } while (0)
+#define __p4d_free_tlb(tlb, x, a)		do { } while (0)
+
+#undef  p4d_addr_end
+#define p4d_addr_end(addr, end)			(end)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PGTABLE_NOP4D_H */
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 5e49430a30a4..c2b9b96d6268 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -6,53 +6,54 @@
 #ifdef __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nop4d-hack.h>
 #else
+#include <asm-generic/pgtable-nop4d.h>
 
 #define __PAGETABLE_PUD_FOLDED
 
 /*
- * Having the pud type consist of a pgd gets the size right, and allows
- * us to conceptually access the pgd entry that this pud is folded into
+ * Having the pud type consist of a p4d gets the size right, and allows
+ * us to conceptually access the p4d entry that this pud is folded into
  * without casting.
  */
-typedef struct { pgd_t pgd; } pud_t;
+typedef struct { p4d_t p4d; } pud_t;
 
-#define PUD_SHIFT	PGDIR_SHIFT
+#define PUD_SHIFT	P4D_SHIFT
 #define PTRS_PER_PUD	1
 #define PUD_SIZE  	(1UL << PUD_SHIFT)
 #define PUD_MASK  	(~(PUD_SIZE-1))
 
 /*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * The "p4d_xxx()" functions here are trivial for a folded two-level
  * setup: the pud is never bad, and a pud always exists (as it's folded
- * into the pgd entry)
+ * into the p4d entry)
  */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-static inline void pgd_clear(pgd_t *pgd)	{ }
-#define pud_ERROR(pud)				(pgd_ERROR((pud).pgd))
+static inline int p4d_none(p4d_t p4d)		{ return 0; }
+static inline int p4d_bad(p4d_t p4d)		{ return 0; }
+static inline int p4d_present(p4d_t p4d)	{ return 1; }
+static inline void p4d_clear(p4d_t *p4d)	{ }
+#define pud_ERROR(pud)				(p4d_ERROR((pud).p4d))
 
-#define pgd_populate(mm, pgd, pud)		do { } while (0)
+#define p4d_populate(mm, p4d, pud)		do { } while (0)
 /*
- * (puds are folded into pgds so this doesn't get actually called,
+ * (puds are folded into p4ds so this doesn't get actually called,
  * but the define is needed for a generic inline function.)
  */
-#define set_pgd(pgdptr, pgdval)			set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
+#define set_p4d(p4dptr, p4dval)	set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })
 
-static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 {
-	return (pud_t *)pgd;
+	return (pud_t *)p4d;
 }
 
-#define pud_val(x)				(pgd_val((x).pgd))
-#define __pud(x)				((pud_t) { __pgd(x) } )
+#define pud_val(x)				(p4d_val((x).p4d))
+#define __pud(x)				((pud_t) { __p4d(x) })
 
-#define pgd_page(pgd)				(pud_page((pud_t){ pgd }))
-#define pgd_page_vaddr(pgd)			(pud_page_vaddr((pud_t){ pgd }))
+#define p4d_page(p4d)				(pud_page((pud_t){ p4d }))
+#define p4d_page_vaddr(p4d)			(pud_page_vaddr((pud_t){ p4d }))
 
 /*
  * allocating and freeing a pud is trivial: the 1-entry pud is
- * inside the pgd, so has no extra memory associated with it.
+ * inside the p4d, so has no extra memory associated with it.
  */
 #define pud_alloc_one(mm, address)		NULL
 #define pud_free(mm, x)				do { } while (0)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 4329bc6ef04b..8afa4335e5b2 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
 
+#define pmd_free_tlb(tlb, pmdp, address)			\
+	do {							\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__pmd_free_tlb(tlb, pmdp, address);		\
+	} while (0)
+
 #ifndef __ARCH_HAS_4LEVEL_HACK
 #define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
@@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 	} while (0)
 #endif
 
-#define pmd_free_tlb(tlb, pmdp, address)			\
+#ifndef __ARCH_HAS_5LEVEL_HACK
+#define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
-		__pmd_free_tlb(tlb, pmdp, address);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
+#endif
 
 #define tlb_migrate_finish(mm) do {} while (0)
 
-- 
cgit v1.2.3


From c2febafc67734a62196c1b9dfba926412d4077ba Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:07 +0300
Subject: mm: convert generic code to 5-level paging

Convert all non-architecture-specific code to 5-level paging.

It's mostly mechanical adding handling one more page table level in
places where we deal with pud_t.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grufault.c |   9 +-
 fs/userfaultfd.c                |   6 +-
 include/asm-generic/pgtable.h   |  48 +++++++++-
 include/linux/hugetlb.h         |   5 +-
 include/linux/kasan.h           |   1 +
 include/linux/mm.h              |  31 ++++--
 lib/ioremap.c                   |  39 +++++++-
 mm/gup.c                        |  46 +++++++--
 mm/huge_memory.c                |   7 +-
 mm/hugetlb.c                    |  29 +++---
 mm/kasan/kasan_init.c           |  44 ++++++++-
 mm/memory.c                     | 207 +++++++++++++++++++++++++++++++++-------
 mm/mlock.c                      |   1 +
 mm/mprotect.c                   |  26 ++++-
 mm/mremap.c                     |  13 ++-
 mm/page_vma_mapped.c            |   6 +-
 mm/pagewalk.c                   |  32 ++++++-
 mm/pgtable-generic.c            |   6 ++
 mm/rmap.c                       |   7 +-
 mm/sparse-vmemmap.c             |  22 ++++-
 mm/swapfile.c                   |  26 ++++-
 mm/userfaultfd.c                |  23 +++--
 mm/vmalloc.c                    |  81 ++++++++++++----
 23 files changed, 595 insertions(+), 120 deletions(-)

diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 6fb773dbcd0c..93be82fc338a 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
 	int write, unsigned long *paddr, int *pageshift)
 {
 	pgd_t *pgdp;
-	pmd_t *pmdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
+	pmd_t *pmdp;
 	pte_t pte;
 
 	pgdp = pgd_offset(vma->vm_mm, vaddr);
 	if (unlikely(pgd_none(*pgdp)))
 		goto err;
 
-	pudp = pud_offset(pgdp, vaddr);
+	p4dp = p4d_offset(pgdp, vaddr);
+	if (unlikely(p4d_none(*p4dp)))
+		goto err;
+
+	pudp = pud_offset(p4dp, vaddr);
 	if (unlikely(pud_none(*pudp)))
 		goto err;
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579d..02ce3944d0f5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -267,6 +267,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 {
 	struct mm_struct *mm = ctx->mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd, _pmd;
 	pte_t *pte;
@@ -277,7 +278,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
 		goto out;
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		goto out;
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		goto out;
 	pmd = pmd_offset(pud, address);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f4ca23b158b3..1fad160f35de 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -10,9 +10,9 @@
 #include <linux/bug.h>
 #include <linux/errno.h>
 
-#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \
-	CONFIG_PGTABLE_LEVELS
-#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED
+#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
+	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
+#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
 #endif
 
 /*
@@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+#ifndef p4d_addr_end
+#define p4d_addr_end(addr, end)						\
+({	unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;	\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+#endif
+
 #ifndef pud_addr_end
 #define pud_addr_end(addr, end)						\
 ({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
@@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
  */
 void pgd_clear_bad(pgd_t *);
+void p4d_clear_bad(p4d_t *);
 void pud_clear_bad(pud_t *);
 void pmd_clear_bad(pmd_t *);
 
@@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 	return 0;
 }
 
+static inline int p4d_none_or_clear_bad(p4d_t *p4d)
+{
+	if (p4d_none(*p4d))
+		return 1;
+	if (unlikely(p4d_bad(*p4d))) {
+		p4d_clear_bad(p4d);
+		return 1;
+	}
+	return 0;
+}
+
 static inline int pud_none_or_clear_bad(pud_t *pud)
 {
 	if (pud_none(*pud))
@@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd)
 #endif /* CONFIG_MMU */
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#ifndef __PAGETABLE_P4D_FOLDED
+int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
+int p4d_clear_huge(p4d_t *p4d);
+#else
+static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
+static inline int p4d_clear_huge(p4d_t *p4d)
+{
+	return 0;
+}
+#endif /* !__PAGETABLE_P4D_FOLDED */
+
 int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
 static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 {
 	return 0;
@@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 {
 	return 0;
 }
+static inline int p4d_clear_huge(p4d_t *p4d)
+{
+	return 0;
+}
 static inline int pud_clear_huge(pud_t *pud)
 {
 	return 0;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 503099d8aada..b857fc8cc2ec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int flags);
 int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pmd);
+int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 #ifndef pgd_huge
 #define pgd_huge(x)	0
 #endif
+#ifndef p4d_huge
+#define p4d_huge(x)	0
+#endif
 
 #ifndef pgd_write
 static inline int pgd_write(pgd_t pgd)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index ceb3fe78a0d3..1c823bef4c15 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
 extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
 extern pud_t kasan_zero_pud[PTRS_PER_PUD];
+extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
 
 void kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index be1fe264eb37..5f01c88f0800 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 	return ptep;
 }
 
+#ifdef __PAGETABLE_P4D_FOLDED
+static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+						unsigned long address)
+{
+	return 0;
+}
+#else
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+#endif
+
 #ifdef __PAGETABLE_PUD_FOLDED
-static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
+static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 						unsigned long address)
 {
 	return 0;
 }
 #else
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 #endif
 
 #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
@@ -1621,10 +1631,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
 
 #ifndef __ARCH_HAS_5LEVEL_HACK
-static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+		unsigned long address)
+{
+	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
+		NULL : p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
+		unsigned long address)
 {
-	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
-		NULL: pud_offset(pgd, address);
+	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
+		NULL : pud_offset(p4d, address);
 }
 #endif /* !__ARCH_HAS_5LEVEL_HACK */
 
@@ -2388,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
-pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
+p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
+pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index a3e14ce92a56..4bb30206b942 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -14,6 +14,7 @@
 #include <asm/pgtable.h>
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static int __read_mostly ioremap_p4d_capable;
 static int __read_mostly ioremap_pud_capable;
 static int __read_mostly ioremap_pmd_capable;
 static int __read_mostly ioremap_huge_disabled;
@@ -35,6 +36,11 @@ void __init ioremap_huge_init(void)
 	}
 }
 
+static inline int ioremap_p4d_enabled(void)
+{
+	return ioremap_p4d_capable;
+}
+
 static inline int ioremap_pud_enabled(void)
 {
 	return ioremap_pud_capable;
@@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void)
 }
 
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int ioremap_p4d_enabled(void) { return 0; }
 static inline int ioremap_pud_enabled(void) { return 0; }
 static inline int ioremap_pmd_enabled(void) { return 0; }
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
@@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 	return 0;
 }
 
-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pud_t *pud;
 	unsigned long next;
 
 	phys_addr -= addr;
-	pud = pud_alloc(&init_mm, pgd, addr);
+	pud = pud_alloc(&init_mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 	return 0;
 }
 
+static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	phys_addr -= addr;
+	p4d = p4d_alloc(&init_mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+
+		if (ioremap_p4d_enabled() &&
+		    ((next - addr) == P4D_SIZE) &&
+		    IS_ALIGNED(phys_addr + addr, P4D_SIZE)) {
+			if (p4d_set_huge(p4d, phys_addr + addr, prot))
+				continue;
+		}
+
+		if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot))
+			return -ENOMEM;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 int ioremap_page_range(unsigned long addr,
 		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
@@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr,
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+		err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
diff --git a/mm/gup.c b/mm/gup.c
index 9c047e951aa3..c74bad1bf6e8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 			      unsigned int *page_mask)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	spinlock_t *ptl;
@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 	pgd = pgd_offset(mm, address);
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		return no_page_table(vma, flags);
-
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d))
+		return no_page_table(vma, flags);
+	BUILD_BUG_ON(p4d_huge(*p4d));
+	if (unlikely(p4d_bad(*p4d)))
+		return no_page_table(vma, flags);
+	pud = pud_offset(p4d, address);
 	if (pud_none(*pud))
 		return no_page_table(vma, flags);
 	if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
@@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 		struct page **page)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 	else
 		pgd = pgd_offset_gate(mm, address);
 	BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	BUG_ON(p4d_none(*p4d));
+	pud = pud_offset(p4d, address);
 	BUG_ON(pud_none(*pud));
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
@@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 	return 1;
 }
 
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 			 int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp;
 
-	pudp = pud_offset(&pgd, addr);
+	pudp = pud_offset(&p4d, addr);
 	do {
 		pud_t pud = READ_ONCE(*pudp);
 
@@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 	return 1;
 }
 
+static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+			 int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	p4d_t *p4dp;
+
+	p4dp = p4d_offset(&pgd, addr);
+	do {
+		p4d_t p4d = READ_ONCE(*p4dp);
+
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(p4d))
+			return 0;
+		BUILD_BUG_ON(p4d_huge(p4d));
+		if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
+			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
+					 P4D_SHIFT, next, write, pages, nr))
+				return 0;
+		} else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+			return 0;
+	} while (p4dp++, addr = next, addr != end);
+
+	return 1;
+}
+
 /*
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  * the regular GUP. It will only return non-negative values.
@@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
 					 PGDIR_SHIFT, next, write, pages, &nr))
 				break;
-		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+		} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
 			break;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_restore(flags);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d36b2af4d1bf..e4766de25709 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		bool freeze, struct page *page)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 	if (!pgd_present(*pgd))
 		return;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return;
+
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		return;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a7aa811b7d14..3d0aab9ee80d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4555,7 +4555,8 @@ out:
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
 	pgd_t *pgd = pgd_offset(mm, *addr);
-	pud_t *pud = pud_offset(pgd, *addr);
+	p4d_t *p4d = p4d_offset(pgd, *addr);
+	pud_t *pud = pud_offset(p4d, *addr);
 
 	BUG_ON(page_count(virt_to_page(ptep)) == 0);
 	if (page_count(virt_to_page(ptep)) == 1)
@@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (pud) {
 		if (sz == PUD_SIZE) {
 			pte = (pte_t *)pud;
@@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
-	pmd_t *pmd = NULL;
+	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud)) {
-			if (pud_huge(*pud))
-				return (pte_t *)pud;
-			pmd = pmd_offset(pud, addr);
-		}
-	}
+	if (!pgd_present(*pgd))
+		return NULL;
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return NULL;
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		return NULL;
+	if (pud_huge(*pud))
+		return (pte_t *)pud;
+	pmd = pmd_offset(pud, addr);
 	return (pte_t *) pmd;
 }
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 31238dad85fb..b96a5f773d88 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -30,6 +30,9 @@
  */
 unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
 
+#if CONFIG_PGTABLE_LEVELS > 4
+p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+#endif
 #if CONFIG_PGTABLE_LEVELS > 3
 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
 #endif
@@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
 				unsigned long end)
 {
-	pud_t *pud = pud_offset(pgd, addr);
+	pud_t *pud = pud_offset(p4d, addr);
 	unsigned long next;
 
 	do {
@@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
 	} while (pud++, addr = next, addr != end);
 }
 
+static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+				unsigned long end)
+{
+	p4d_t *p4d = p4d_offset(pgd, addr);
+	unsigned long next;
+
+	do {
+		next = p4d_addr_end(addr, end);
+
+		if (p4d_none(*p4d)) {
+			p4d_populate(&init_mm, p4d,
+				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+		}
+		zero_pud_populate(p4d, addr, next);
+	} while (p4d++, addr = next, addr != end);
+}
+
 /**
  * kasan_populate_zero_shadow - populate shadow memory region with
  *                               kasan_zero_page
@@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 		next = pgd_addr_end(addr, end);
 
 		if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+			p4d_t *p4d;
 			pud_t *pud;
 			pmd_t *pmd;
 
@@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 			 * 3,2 - level page tables where we don't have
 			 * puds,pmds, so pgd_populate(), pud_populate()
 			 * is noops.
+			 *
+			 * The ifndef is required to avoid build breakage.
+			 *
+			 * With 5level-fixup.h, pgd_populate() is not nop and
+			 * we reference kasan_zero_p4d. It's not defined
+			 * unless 5-level paging enabled.
+			 *
+			 * The ifndef can be dropped once all KASAN-enabled
+			 * architectures will switch to pgtable-nop4d.h.
 			 */
-			pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud));
-			pud = pud_offset(pgd, addr);
+#ifndef __ARCH_HAS_5LEVEL_HACK
+			pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d));
+#endif
+			p4d = p4d_offset(pgd, addr);
+			p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
+			pud = pud_offset(p4d, addr);
 			pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
@@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 			pgd_populate(&init_mm, pgd,
 				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
 		}
-		zero_pud_populate(pgd, addr, next);
+		zero_p4d_populate(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
diff --git a/mm/memory.c b/mm/memory.c
index a97a4cec2e1f..7f1c2163b3ce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	mm_dec_nr_pmds(tlb->mm);
 }
 
-static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
 				unsigned long floor, unsigned long ceiling)
 {
@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	unsigned long start;
 
 	start = addr;
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 		free_pmd_range(tlb, pud, addr, next, floor, ceiling);
 	} while (pud++, addr = next, addr != end);
 
+	start &= P4D_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= P4D_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(p4d, start);
+	p4d_clear(p4d);
+	pud_free_tlb(tlb, pud, start);
+}
+
+static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				unsigned long floor, unsigned long ceiling)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+	} while (p4d++, addr = next, addr != end);
+
 	start &= PGDIR_MASK;
 	if (start < floor)
 		return;
@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	if (end - 1 > ceiling - 1)
 		return;
 
-	pud = pud_offset(pgd, start);
+	p4d = p4d_offset(pgd, start);
 	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud, start);
+	p4d_free_tlb(tlb, p4d, start);
 }
 
 /*
@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+		free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 			  pte_t pte, struct page *page)
 {
 	pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
-	pud_t *pud = pud_offset(pgd, addr);
+	p4d_t *p4d = p4d_offset(pgd, addr);
+	pud_t *pud = pud_offset(p4d, addr);
 	pmd_t *pmd = pmd_offset(pud, addr);
 	struct address_space *mapping;
 	pgoff_t index;
@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
 }
 
 static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pud_t *src_pud, *dst_pud;
 	unsigned long next;
 
-	dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
+	dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
 	if (!dst_pud)
 		return -ENOMEM;
-	src_pud = pud_offset(src_pgd, addr);
+	src_pud = pud_offset(src_p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
 	return 0;
 }
 
+static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	p4d_t *src_p4d, *dst_p4d;
+	unsigned long next;
+
+	dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
+	if (!dst_p4d)
+		return -ENOMEM;
+	src_p4d = p4d_offset(src_pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(src_p4d))
+			continue;
+		if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
+						vma, addr, next))
+			return -ENOMEM;
+	} while (dst_p4d++, src_p4d++, addr = next, addr != end);
+	return 0;
+}
+
 int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		struct vm_area_struct *vma)
 {
@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
-		if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+		if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
 					    vma, addr, next))) {
 			ret = -ENOMEM;
 			break;
@@ -1267,14 +1323,14 @@ next:
 }
 
 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
-				struct vm_area_struct *vma, pgd_t *pgd,
+				struct vm_area_struct *vma, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
 				struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
@@ -1295,6 +1351,25 @@ next:
 	return addr;
 }
 
+static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				struct zap_details *details)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		next = zap_pud_range(tlb, vma, p4d, addr, next, details);
+	} while (p4d++, addr = next, addr != end);
+
+	return addr;
+}
+
 void unmap_page_range(struct mmu_gather *tlb,
 			     struct vm_area_struct *vma,
 			     unsigned long addr, unsigned long end,
@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
+		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
 }
@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
 pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
-	pgd_t *pgd = pgd_offset(mm, addr);
-	pud_t *pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
-		pmd_t *pmd = pmd_alloc(mm, pud, addr);
-		if (pmd) {
-			VM_BUG_ON(pmd_trans_huge(*pmd));
-			return pte_alloc_map_lock(mm, pmd, addr, ptl);
-		}
-	}
-	return NULL;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return NULL;
+	pud = pud_alloc(mm, p4d, addr);
+	if (!pud)
+		return NULL;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return NULL;
+
+	VM_BUG_ON(pmd_trans_huge(*pmd));
+	return pte_alloc_map_lock(mm, pmd, addr, ptl);
 }
 
 /*
@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 	return 0;
 }
 
-static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
 			unsigned long addr, unsigned long end,
 			unsigned long pfn, pgprot_t prot)
 {
@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	unsigned long next;
 
 	pfn -= addr >> PAGE_SHIFT;
-	pud = pud_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	return 0;
 }
 
+static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+			unsigned long addr, unsigned long end,
+			unsigned long pfn, pgprot_t prot)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	pfn -= addr >> PAGE_SHIFT;
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+		if (remap_pud_range(mm, p4d, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot))
+			return -ENOMEM;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 /**
  * remap_pfn_range - remap kernel memory to userspace
  * @vma: user vma to map to
@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	flush_cache_range(vma, addr, end);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = remap_pud_range(mm, pgd, addr, next,
+		err = remap_p4d_range(mm, pgd, addr, next,
 				pfn + (addr >> PAGE_SHIFT), prot);
 		if (err)
 			break;
@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 	return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
 				     unsigned long addr, unsigned long end,
 				     pte_fn_t fn, void *data)
 {
@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	unsigned long next;
 	int err;
 
-	pud = pud_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	return err;
 }
 
+static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+				     unsigned long addr, unsigned long end,
+				     pte_fn_t fn, void *data)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int err;
+
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+		err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
+		if (err)
+			break;
+	} while (p4d++, addr = next, addr != end);
+	return err;
+}
+
 /*
  * Scan a region of virtual memory, filling in page tables as necessary
  * and calling a provided function on each leaf page table.
@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
+		err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	};
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	int ret;
 
 	pgd = pgd_offset(mm, address);
+	p4d = p4d_alloc(mm, pgd, address);
+	if (!p4d)
+		return VM_FAULT_OOM;
 
-	vmf.pud = pud_alloc(mm, pgd, address);
+	vmf.pud = pud_alloc(mm, p4d, address);
 	if (!vmf.pud)
 		return VM_FAULT_OOM;
 	if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
@@ -3784,7 +3911,7 @@ EXPORT_SYMBOL_GPL(handle_mm_fault);
  * Allocate page upper directory.
  * We've already handled the fast-path in-line.
  */
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
 {
 	pud_t *new = pud_alloc_one(mm, address);
 	if (!new)
@@ -3793,10 +3920,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 	smp_wmb(); /* See comment in __pte_alloc */
 
 	spin_lock(&mm->page_table_lock);
-	if (pgd_present(*pgd))		/* Another has populated it */
+#ifndef __ARCH_HAS_5LEVEL_HACK
+	if (p4d_present(*p4d))		/* Another has populated it */
+		pud_free(mm, new);
+	else
+		p4d_populate(mm, p4d, new);
+#else
+	if (pgd_present(*p4d))		/* Another has populated it */
 		pud_free(mm, new);
 	else
-		pgd_populate(mm, pgd, new);
+		pgd_populate(mm, p4d, new);
+#endif /* __ARCH_HAS_5LEVEL_HACK */
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
@@ -3839,6 +3973,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 		pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *ptep;
@@ -3847,7 +3982,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		goto out;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+		goto out;
+
+	pud = pud_offset(p4d, address);
 	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
 		goto out;
 
diff --git a/mm/mlock.c b/mm/mlock.c
index 1050511f8b2b..945edac46810 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
 	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
 	/* Make sure we do not cross the page table boundary */
 	end = pgd_addr_end(start, end);
+	end = p4d_addr_end(start, end);
 	end = pud_addr_end(start, end);
 	end = pmd_addr_end(start, end);
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 848e946b08e5..8edd0d576254 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 }
 
 static inline unsigned long change_pud_range(struct vm_area_struct *vma,
-		pgd_t *pgd, unsigned long addr, unsigned long end,
+		p4d_t *p4d, unsigned long addr, unsigned long end,
 		pgprot_t newprot, int dirty_accountable, int prot_numa)
 {
 	pud_t *pud;
 	unsigned long next;
 	unsigned long pages = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
 	return pages;
 }
 
+static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		pgprot_t newprot, int dirty_accountable, int prot_numa)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	unsigned long pages = 0;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		pages += change_pud_range(vma, p4d, addr, next, newprot,
+				 dirty_accountable, prot_numa);
+	} while (p4d++, addr = next, addr != end);
+
+	return pages;
+}
+
 static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable, int prot_numa)
@@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		pages += change_pud_range(vma, pgd, addr, next, newprot,
+		pages += change_p4d_range(vma, pgd, addr, next, newprot,
 				 dirty_accountable, prot_numa);
 	} while (pgd++, addr = next, addr != end);
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 8233b0105c82..cd8a1b199ef9 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -32,6 +32,7 @@
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 	if (pgd_none_or_clear_bad(pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none_or_clear_bad(p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
 	if (pud_none_or_clear_bad(pud))
 		return NULL;
 
@@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
 			    unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return NULL;
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return NULL;
 
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index a23001a22c15..c4c9def8ffea 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	struct mm_struct *mm = pvmw->vma->vm_mm;
 	struct page *page = pvmw->page;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 
 	/* The only possible pmd mapping has been handled on last iteration */
@@ -133,7 +134,10 @@ restart:
 	pgd = pgd_offset(mm, pvmw->address);
 	if (!pgd_present(*pgd))
 		return false;
-	pud = pud_offset(pgd, pvmw->address);
+	p4d = p4d_offset(pgd, pvmw->address);
+	if (!p4d_present(*p4d))
+		return false;
+	pud = pud_offset(p4d, pvmw->address);
 	if (!pud_present(*pud))
 		return false;
 	pvmw->pmd = pmd_offset(pud, pvmw->address);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 03761577ae86..60f7856e508f 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -69,14 +69,14 @@ again:
 	return err;
 }
 
-static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
 	pud_t *pud;
 	unsigned long next;
 	int err = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
  again:
 		next = pud_addr_end(addr, end);
@@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 	return err;
 }
 
+static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  struct mm_walk *walk)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int err = 0;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d)) {
+			if (walk->pte_hole)
+				err = walk->pte_hole(addr, next, walk);
+			if (err)
+				break;
+			continue;
+		}
+		if (walk->pmd_entry || walk->pte_entry)
+			err = walk_pud_range(p4d, addr, next, walk);
+		if (err)
+			break;
+	} while (p4d++, addr = next, addr != end);
+
+	return err;
+}
+
 static int walk_pgd_range(unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
@@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 			continue;
 		}
 		if (walk->pmd_entry || walk->pte_entry)
-			err = walk_pud_range(pgd, addr, next, walk);
+			err = walk_p4d_range(pgd, addr, next, walk);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 4ed5908c65b0..c99d9512a45b 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd)
 	pgd_clear(pgd);
 }
 
+void p4d_clear_bad(p4d_t *p4d)
+{
+	p4d_ERROR(*p4d);
+	p4d_clear(p4d);
+}
+
 void pud_clear_bad(pud_t *pud)
 {
 	pud_ERROR(*pud);
diff --git a/mm/rmap.c b/mm/rmap.c
index 2da487d6cea8..2984403a2424 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd = NULL;
 	pmd_t pmde;
@@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
 	if (!pgd_present(*pgd))
 		goto out;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		goto out;
+
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		goto out;
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 574c67b663fe..a56c3989f773 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 	return pmd;
 }
 
-pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
+pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
 {
-	pud_t *pud = pud_offset(pgd, addr);
+	pud_t *pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
 		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
 		if (!p)
@@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
 	return pud;
 }
 
+p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
+{
+	p4d_t *p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (!p)
+			return NULL;
+		p4d_populate(&init_mm, p4d, p);
+	}
+	return p4d;
+}
+
 pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
 {
 	pgd_t *pgd = pgd_offset_k(addr);
@@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
 {
 	unsigned long addr = start;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
 		pgd = vmemmap_pgd_populate(addr, node);
 		if (!pgd)
 			return -ENOMEM;
-		pud = vmemmap_pud_populate(pgd, addr, node);
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+		pud = vmemmap_pud_populate(p4d, addr, node);
 		if (!pud)
 			return -ENOMEM;
 		pmd = vmemmap_pmd_populate(pud, addr, node);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 521ef9b6064f..178130880b90 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 	return 0;
 }
 
-static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
 				swp_entry_t entry, struct page *page)
 {
@@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 	unsigned long next;
 	int ret;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 	return 0;
 }
 
+static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				swp_entry_t entry, struct page *page)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int ret;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
+		if (ret)
+			return ret;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 static int unuse_vma(struct vm_area_struct *vma,
 				swp_entry_t entry, struct page *page)
 {
@@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
+		ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
 		if (ret)
 			return ret;
 	} while (pgd++, addr = next, addr != end);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 479e631d43c2..8bcb501bce60 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -128,19 +128,22 @@ out_unlock:
 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
-	pmd_t *pmd = NULL;
 
 	pgd = pgd_offset(mm, address);
-	pud = pud_alloc(mm, pgd, address);
-	if (pud)
-		/*
-		 * Note that we didn't run this because the pmd was
-		 * missing, the *pmd may be already established and in
-		 * turn it may also be a trans_huge_pmd.
-		 */
-		pmd = pmd_alloc(mm, pud, address);
-	return pmd;
+	p4d = p4d_alloc(mm, pgd, address);
+	if (!p4d)
+		return NULL;
+	pud = pud_alloc(mm, p4d, address);
+	if (!pud)
+		return NULL;
+	/*
+	 * Note that we didn't run this because the pmd was
+	 * missing, the *pmd may be already established and in
+	 * turn it may also be a trans_huge_pmd.
+	 */
+	return pmd_alloc(mm, pud, address);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b4024d688f38..0dd80222b20b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_clear_huge(pud))
@@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
 	} while (pud++, addr = next, addr != end);
 }
 
+static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_clear_huge(p4d))
+			continue;
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		vunmap_pud_range(p4d, addr, next);
+	} while (p4d++, addr = next, addr != end);
+}
+
 static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
 	pgd_t *pgd;
@@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		vunmap_pud_range(pgd, addr, next);
+		vunmap_p4d_range(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
 	return 0;
 }
 
-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_alloc(&init_mm, pgd, addr);
+	pud = pud_alloc(&init_mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 	return 0;
 }
 
+static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	p4d = p4d_alloc(&init_mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
+			return -ENOMEM;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 /*
  * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
  * will have pfns corresponding to the "pages" array.
@@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
+		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
 		if (err)
 			return err;
 	} while (pgd++, addr = next, addr != end);
@@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	unsigned long addr = (unsigned long) vmalloc_addr;
 	struct page *page = NULL;
 	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
 
 	/*
 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	 */
 	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
 
-	if (!pgd_none(*pgd)) {
-		pud_t *pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd_t *pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd)) {
-				pte_t *ptep, pte;
-
-				ptep = pte_offset_map(pmd, addr);
-				pte = *ptep;
-				if (pte_present(pte))
-					page = pte_page(pte);
-				pte_unmap(ptep);
-			}
-		}
-	}
+	if (pgd_none(*pgd))
+		return NULL;
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return NULL;
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return NULL;
+
+	ptep = pte_offset_map(pmd, addr);
+	pte = *ptep;
+	if (pte_present(pte))
+		page = pte_page(pte);
+	pte_unmap(ptep);
 	return page;
 }
 EXPORT_SYMBOL(vmalloc_to_page);
-- 
cgit v1.2.3


From 90eceff1a375f6ffa78caf8654e787c0a8a591ef Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:08 +0300
Subject: mm: introduce __p4d_alloc()

For full 5-level paging we need a helper to allocate p4d page table.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index 7f1c2163b3ce..235ba51b2fbf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3906,6 +3906,29 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault);
 
+#ifndef __PAGETABLE_P4D_FOLDED
+/*
+ * Allocate p4d page table.
+ * We've already handled the fast-path in-line.
+ */
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	p4d_t *new = p4d_alloc_one(mm, address);
+	if (!new)
+		return -ENOMEM;
+
+	smp_wmb(); /* See comment in __pte_alloc */
+
+	spin_lock(&mm->page_table_lock);
+	if (pgd_present(*pgd))		/* Another has populated it */
+		p4d_free(mm, new);
+	else
+		pgd_populate(mm, pgd, new);
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+#endif /* __PAGETABLE_P4D_FOLDED */
+
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.
-- 
cgit v1.2.3


From 5be083cedc087b2d457ef2e9c2d5698c94d3d5e4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 6 Mar 2017 15:57:31 -0800
Subject: net: ipv6: Remove redundant RTA_OIF in multipath routes

Dinesh reported that RTA_MULTIPATH nexthops are 8-bytes larger with IPv6
than IPv4. The recent refactoring for multipath support in netlink
messages does discriminate between non-multipath which needs the OIF
and multipath which adds a rtnexthop struct for each hop making the
RTA_OIF attribute redundant. Resolve by adding a flag to the info
function to skip the oif for multipath.

Fixes: beb1afac518d ("net: ipv6: Add support to dump multipath routes
       via RTA_MULTIPATH attribute")
Reported-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 229bfcc451ef..35c58b669ebd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3299,7 +3299,6 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
 			    + NLA_ALIGN(sizeof(struct rtnexthop))
 			    + nla_total_size(16) /* RTA_GATEWAY */
-			    + nla_total_size(4)  /* RTA_OIF */
 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
 
 		nexthop_len *= rt->rt6i_nsiblings;
@@ -3323,7 +3322,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 }
 
 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
-			    unsigned int *flags)
+			    unsigned int *flags, bool skip_oif)
 {
 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
 		*flags |= RTNH_F_LINKDOWN;
@@ -3336,7 +3335,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
-	if (rt->dst.dev &&
+	/* not needed for multipath encoding b/c it has a rtnexthop struct */
+	if (!skip_oif && rt->dst.dev &&
 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
 		goto nla_put_failure;
 
@@ -3350,6 +3350,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+/* add multipath next hop */
 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 {
 	struct rtnexthop *rtnh;
@@ -3362,7 +3363,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	rtnh->rtnh_hops = 0;
 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
 
-	if (rt6_nexthop_info(skb, rt, &flags) < 0)
+	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
 		goto nla_put_failure;
 
 	rtnh->rtnh_flags = flags;
@@ -3515,7 +3516,7 @@ static int rt6_fill_node(struct net *net,
 
 		nla_nest_end(skb, mp);
 	} else {
-		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
+		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
 			goto nla_put_failure;
 	}
 
-- 
cgit v1.2.3


From 67e303e0c7683957eb4e530453705a43a6d4f966 Mon Sep 17 00:00:00 2001
From: VSR Burru <veerasenareddy.burru@cavium.com>
Date: Mon, 6 Mar 2017 18:45:59 -0800
Subject: liquidio: improve UDP TX performance

Improve UDP TX performance by:
* reducing the ring size from 2K to 512
* replacing the numerous streaming DMA allocations for info buffers and
  gather lists with one large consistent DMA allocation per ring

BQL is not effective here.  We reduced the ring size because there is heavy
overhead with dma_map_single every so often.  With iommu=on, dma_map_single
in PF Tx data path was taking longer time (~700usec) for every ~250
packets.  Debugged intel_iommu code, and found that PF driver is utilizing
too many static IO virtual address mapping entries (for gather list entries
and info buffers): about 100K entries for two PF's each using 8 rings.
Also, finding an empty entry (in rbtree of device domain's iova mapping in
kernel) during Tx path becomes a bottleneck every so often; the loop to
find the empty entry goes through over 40K iterations; this is too costly
and was the major overhead.  Overhead is low when this loop quits quickly.

Netperf benchmark numbers before and after patch:

PF UDP TX
+--------+--------+------------+------------+---------+
|        |        |  Before    |  After     |         |
| Number |        |  Patch     |  Patch     |         |
|  of    | Packet | Throughput | Throughput | Percent |
| Flows  |  Size  |  (Gbps)    |  (Gbps)    | Change  |
+--------+--------+------------+------------+---------+
|        |   360  |   0.52     |   0.93     |  +78.9  |
|   1    |  1024  |   1.62     |   2.84     |  +75.3  |
|        |  1518  |   2.44     |   4.21     |  +72.5  |
+--------+--------+------------+------------+---------+
|        |   360  |   0.45     |   1.59     | +253.3  |
|   4    |  1024  |   1.34     |   5.48     | +308.9  |
|        |  1518  |   2.27     |   8.31     | +266.1  |
+--------+--------+------------+------------+---------+
|        |   360  |   0.40     |   1.61     | +302.5  |
|   8    |  1024  |   1.64     |   4.24     | +158.5  |
|        |  1518  |   2.87     |   6.52     | +127.2  |
+--------+--------+------------+------------+---------+

VF UDP TX
+--------+--------+------------+------------+---------+
|        |        |  Before    |  After     |         |
| Number |        |  Patch     |  Patch     |         |
|  of    | Packet | Throughput | Throughput | Percent |
| Flows  |  Size  |  (Gbps)    |  (Gbps)    | Change  |
+--------+--------+------------+------------+---------+
|        |   360  |   1.28     |   1.49     |  +16.4  |
|   1    |  1024  |   4.44     |   4.39     |   -1.1  |
|        |  1518  |   6.08     |   6.51     |   +7.1  |
+--------+--------+------------+------------+---------+
|        |   360  |   2.35     |   2.35     |    0.0  |
|   4    |  1024  |   6.41     |   8.07     |  +25.9  |
|        |  1518  |   9.56     |   9.54     |   -0.2  |
+--------+--------+------------+------------+---------+
|        |   360  |   3.41     |   3.65     |   +7.0  |
|   8    |  1024  |   9.35     |   9.34     |   -0.1  |
|        |  1518  |   9.56     |   9.57     |   +0.1  |
+--------+--------+------------+------------+---------+

Signed-off-by: VSR Burru <veerasenareddy.burru@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 110 ++++++++++-----------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 104 ++++++++++---------
 .../net/ethernet/cavium/liquidio/octeon_config.h   |   6 +-
 drivers/net/ethernet/cavium/liquidio/octeon_droq.c |  17 +---
 drivers/net/ethernet/cavium/liquidio/octeon_droq.h |   4 +-
 drivers/net/ethernet/cavium/liquidio/octeon_main.h |  42 --------
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  43 +++++---
 7 files changed, 144 insertions(+), 182 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index be9c0e3f5ade..92f46b1375c3 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -152,7 +152,7 @@ struct octnic_gather {
 	 */
 	struct octeon_sg_entry *sg;
 
-	u64 sg_dma_ptr;
+	dma_addr_t sg_dma_ptr;
 };
 
 struct handshake {
@@ -734,6 +734,9 @@ static void delete_glists(struct lio *lio)
 	struct octnic_gather *g;
 	int i;
 
+	kfree(lio->glist_lock);
+	lio->glist_lock = NULL;
+
 	if (!lio->glist)
 		return;
 
@@ -741,23 +744,26 @@ static void delete_glists(struct lio *lio)
 		do {
 			g = (struct octnic_gather *)
 				list_delete_head(&lio->glist[i]);
-			if (g) {
-				if (g->sg) {
-					dma_unmap_single(&lio->oct_dev->
-							 pci_dev->dev,
-							 g->sg_dma_ptr,
-							 g->sg_size,
-							 DMA_TO_DEVICE);
-					kfree((void *)((unsigned long)g->sg -
-						       g->adjust));
-				}
+			if (g)
 				kfree(g);
-			}
 		} while (g);
+
+		if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+			lio_dma_free(lio->oct_dev,
+				     lio->glist_entry_size * lio->tx_qsize,
+				     lio->glists_virt_base[i],
+				     lio->glists_dma_base[i]);
+		}
 	}
 
-	kfree((void *)lio->glist);
-	kfree((void *)lio->glist_lock);
+	kfree(lio->glists_virt_base);
+	lio->glists_virt_base = NULL;
+
+	kfree(lio->glists_dma_base);
+	lio->glists_dma_base = NULL;
+
+	kfree(lio->glist);
+	lio->glist = NULL;
 }
 
 /**
@@ -772,13 +778,30 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 	lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
 				  GFP_KERNEL);
 	if (!lio->glist_lock)
-		return 1;
+		return -ENOMEM;
 
 	lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
 			     GFP_KERNEL);
 	if (!lio->glist) {
-		kfree((void *)lio->glist_lock);
-		return 1;
+		kfree(lio->glist_lock);
+		lio->glist_lock = NULL;
+		return -ENOMEM;
+	}
+
+	lio->glist_entry_size =
+		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+	/* allocate memory to store virtual and dma base address of
+	 * per glist consistent memory
+	 */
+	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+					GFP_KERNEL);
+	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+				       GFP_KERNEL);
+
+	if (!lio->glists_virt_base || !lio->glists_dma_base) {
+		delete_glists(lio);
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < num_iqs; i++) {
@@ -788,6 +811,16 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 
 		INIT_LIST_HEAD(&lio->glist[i]);
 
+		lio->glists_virt_base[i] =
+			lio_dma_alloc(oct,
+				      lio->glist_entry_size * lio->tx_qsize,
+				      &lio->glists_dma_base[i]);
+
+		if (!lio->glists_virt_base[i]) {
+			delete_glists(lio);
+			return -ENOMEM;
+		}
+
 		for (j = 0; j < lio->tx_qsize; j++) {
 			g = kzalloc_node(sizeof(*g), GFP_KERNEL,
 					 numa_node);
@@ -796,43 +829,18 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 			if (!g)
 				break;
 
-			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-				      OCT_SG_ENTRY_SIZE);
+			g->sg = lio->glists_virt_base[i] +
+				(j * lio->glist_entry_size);
 
-			g->sg = kmalloc_node(g->sg_size + 8,
-					     GFP_KERNEL, numa_node);
-			if (!g->sg)
-				g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-			if (!g->sg) {
-				kfree(g);
-				break;
-			}
-
-			/* The gather component should be aligned on 64-bit
-			 * boundary
-			 */
-			if (((unsigned long)g->sg) & 7) {
-				g->adjust = 8 - (((unsigned long)g->sg) & 7);
-				g->sg = (struct octeon_sg_entry *)
-					((unsigned long)g->sg + g->adjust);
-			}
-			g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
-						       g->sg, g->sg_size,
-						       DMA_TO_DEVICE);
-			if (dma_mapping_error(&oct->pci_dev->dev,
-					      g->sg_dma_ptr)) {
-				kfree((void *)((unsigned long)g->sg -
-					       g->adjust));
-				kfree(g);
-				break;
-			}
+			g->sg_dma_ptr = lio->glists_dma_base[i] +
+					(j * lio->glist_entry_size);
 
 			list_add_tail(&g->list, &lio->glist[i]);
 		}
 
 		if (j != lio->tx_qsize) {
 			delete_glists(lio);
-			return 1;
+			return -ENOMEM;
 		}
 	}
 
@@ -1885,9 +1893,6 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-				g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 	spin_lock(&lio->glist_lock[iq]);
 	list_add_tail(&g->list, &lio->glist[iq]);
@@ -1933,9 +1938,6 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-				g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
@@ -3273,8 +3275,6 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 			i++;
 		}
 
-		dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
-					   g->sg_size, DMA_TO_DEVICE);
 		dptr = g->sg_dma_ptr;
 
 		if (OCTEON_CN23XX_PF(oct))
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 9d5e03502c76..7b83be4ce1fe 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -108,6 +108,8 @@ struct octnic_gather {
 	 * received from the IP layer.
 	 */
 	struct octeon_sg_entry *sg;
+
+	dma_addr_t sg_dma_ptr;
 };
 
 struct octeon_device_priv {
@@ -490,6 +492,9 @@ static void delete_glists(struct lio *lio)
 	struct octnic_gather *g;
 	int i;
 
+	kfree(lio->glist_lock);
+	lio->glist_lock = NULL;
+
 	if (!lio->glist)
 		return;
 
@@ -497,17 +502,26 @@ static void delete_glists(struct lio *lio)
 		do {
 			g = (struct octnic_gather *)
 			    list_delete_head(&lio->glist[i]);
-			if (g) {
-				if (g->sg)
-					kfree((void *)((unsigned long)g->sg -
-							g->adjust));
+			if (g)
 				kfree(g);
-			}
 		} while (g);
+
+		if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+			lio_dma_free(lio->oct_dev,
+				     lio->glist_entry_size * lio->tx_qsize,
+				     lio->glists_virt_base[i],
+				     lio->glists_dma_base[i]);
+		}
 	}
 
+	kfree(lio->glists_virt_base);
+	lio->glists_virt_base = NULL;
+
+	kfree(lio->glists_dma_base);
+	lio->glists_dma_base = NULL;
+
 	kfree(lio->glist);
-	kfree(lio->glist_lock);
+	lio->glist = NULL;
 }
 
 /**
@@ -522,13 +536,30 @@ static int setup_glists(struct lio *lio, int num_iqs)
 	lio->glist_lock =
 	    kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
 	if (!lio->glist_lock)
-		return 1;
+		return -ENOMEM;
 
 	lio->glist =
 	    kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
 	if (!lio->glist) {
 		kfree(lio->glist_lock);
-		return 1;
+		lio->glist_lock = NULL;
+		return -ENOMEM;
+	}
+
+	lio->glist_entry_size =
+		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+	/* allocate memory to store virtual and dma base address of
+	 * per glist consistent memory
+	 */
+	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+					GFP_KERNEL);
+	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+				       GFP_KERNEL);
+
+	if (!lio->glists_virt_base || !lio->glists_dma_base) {
+		delete_glists(lio);
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < num_iqs; i++) {
@@ -536,34 +567,33 @@ static int setup_glists(struct lio *lio, int num_iqs)
 
 		INIT_LIST_HEAD(&lio->glist[i]);
 
+		lio->glists_virt_base[i] =
+			lio_dma_alloc(lio->oct_dev,
+				      lio->glist_entry_size * lio->tx_qsize,
+				      &lio->glists_dma_base[i]);
+
+		if (!lio->glists_virt_base[i]) {
+			delete_glists(lio);
+			return -ENOMEM;
+		}
+
 		for (j = 0; j < lio->tx_qsize; j++) {
 			g = kzalloc(sizeof(*g), GFP_KERNEL);
 			if (!g)
 				break;
 
-			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-				      OCT_SG_ENTRY_SIZE);
+			g->sg = lio->glists_virt_base[i] +
+				(j * lio->glist_entry_size);
 
-			g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-			if (!g->sg) {
-				kfree(g);
-				break;
-			}
+			g->sg_dma_ptr = lio->glists_dma_base[i] +
+					(j * lio->glist_entry_size);
 
-			/* The gather component should be aligned on 64-bit
-			 * boundary
-			 */
-			if (((unsigned long)g->sg) & 7) {
-				g->adjust = 8 - (((unsigned long)g->sg) & 7);
-				g->sg = (struct octeon_sg_entry *)
-					((unsigned long)g->sg + g->adjust);
-			}
 			list_add_tail(&g->list, &lio->glist[i]);
 		}
 
 		if (j != lio->tx_qsize) {
 			delete_glists(lio);
-			return 1;
+			return -ENOMEM;
 		}
 	}
 
@@ -1324,10 +1354,6 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-			 finfo->dptr, g->sg_size,
-			 DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
@@ -1374,10 +1400,6 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-			 finfo->dptr, g->sg_size,
-			 DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
@@ -2382,23 +2404,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 			i++;
 		}
 
-		dptr = dma_map_single(&oct->pci_dev->dev,
-				      g->sg, g->sg_size,
-				      DMA_TO_DEVICE);
-		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
-			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
-				__func__);
-			dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
-					 skb->len - skb->data_len,
-					 DMA_TO_DEVICE);
-			for (j = 1; j <= frags; j++) {
-				frag = &skb_shinfo(skb)->frags[j - 1];
-				dma_unmap_page(&oct->pci_dev->dev,
-					       g->sg[j >> 2].ptr[j & 3],
-					       frag->size, DMA_TO_DEVICE);
-			}
-			return NETDEV_TX_BUSY;
-		}
+		dptr = g->sg_dma_ptr;
 
 		ndata.cmd.cmd3.dptr = dptr;
 		finfo->dptr = dptr;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index b3dc2e9651a8..d29ebc531151 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -71,17 +71,17 @@
 #define   CN23XX_MAX_RINGS_PER_VF          8
 
 #define   CN23XX_MAX_INPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_IQ_DESCRIPTORS	2048
+#define   CN23XX_MAX_IQ_DESCRIPTORS	512
 #define   CN23XX_DB_MIN                 1
 #define   CN23XX_DB_MAX                 8
 #define   CN23XX_DB_TIMEOUT             1
 
 #define   CN23XX_MAX_OUTPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_OQ_DESCRIPTORS	2048
+#define   CN23XX_MAX_OQ_DESCRIPTORS	512
 #define   CN23XX_OQ_BUF_SIZE		1536
 #define   CN23XX_OQ_PKTSPER_INTR	128
 /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
-#define   CN23XX_OQ_REFIL_THRESHOLD	128
+#define   CN23XX_OQ_REFIL_THRESHOLD	16
 
 #define   CN23XX_OQ_INTR_PKT		64
 #define   CN23XX_OQ_INTR_TIME		100
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 0be87d119a97..79f809479af6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -155,11 +155,6 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
 			recv_buffer_destroy(droq->recv_buf_list[i].buffer,
 					    pg_info);
 
-		if (droq->desc_ring && droq->desc_ring[i].info_ptr)
-			lio_unmap_ring_info(oct->pci_dev,
-					    (u64)droq->
-					    desc_ring[i].info_ptr,
-					    OCT_DROQ_INFO_SIZE);
 		droq->recv_buf_list[i].buffer = NULL;
 	}
 
@@ -211,10 +206,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
 	vfree(droq->recv_buf_list);
 
 	if (droq->info_base_addr)
-		cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
-				       droq->info_alloc_size,
-				       droq->info_base_addr,
-				       droq->info_list_dma);
+		lio_free_info_buffer(oct, droq);
 
 	if (droq->desc_ring)
 		lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -294,12 +286,7 @@ int octeon_init_droq(struct octeon_device *oct,
 	dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
 		droq->max_count);
 
-	droq->info_list =
-		cnnic_numa_alloc_aligned_dma((droq->max_count *
-					      OCT_DROQ_INFO_SIZE),
-					     &droq->info_alloc_size,
-					     &droq->info_base_addr,
-					     numa_node);
+	droq->info_list = lio_alloc_info_buffer(oct, droq);
 	if (!droq->info_list) {
 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
 		lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index e62074090681..6982c0af5ecc 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -325,10 +325,10 @@ struct octeon_droq {
 	size_t desc_ring_dma;
 
 	/** Info ptr list are allocated at this virtual address. */
-	size_t info_base_addr;
+	void *info_base_addr;
 
 	/** DMA mapped address of the info list */
-	size_t info_list_dma;
+	dma_addr_t info_list_dma;
 
 	/** Allocated size of info list. */
 	u32 info_alloc_size;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index aa36e9ae7676..bed9ef17bc26 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -140,48 +140,6 @@ err_release_region:
 	return 1;
 }
 
-static inline void *
-cnnic_numa_alloc_aligned_dma(u32 size,
-			     u32 *alloc_size,
-			     size_t *orig_ptr,
-			     int numa_node)
-{
-	int retries = 0;
-	void *ptr = NULL;
-
-#define OCTEON_MAX_ALLOC_RETRIES     1
-	do {
-		struct page *page = NULL;
-
-		page = alloc_pages_node(numa_node,
-					GFP_KERNEL,
-					get_order(size));
-		if (!page)
-			page = alloc_pages(GFP_KERNEL,
-					   get_order(size));
-		ptr = (void *)page_address(page);
-		if ((unsigned long)ptr & 0x07) {
-			__free_pages(page, get_order(size));
-			ptr = NULL;
-			/* Increment the size required if the first
-			 * attempt failed.
-			 */
-			if (!retries)
-				size += 7;
-		}
-		retries++;
-	} while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr);
-
-	*alloc_size = size;
-	*orig_ptr = (unsigned long)ptr;
-	if ((unsigned long)ptr & 0x07)
-		ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL));
-	return ptr;
-}
-
-#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
-		free_pages(orig_ptr, get_order(size))
-
 static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 6bb89419006e..eef2a1e8a7e3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -62,6 +62,9 @@ struct lio {
 
 	/** Array of gather component linked lists */
 	struct list_head *glist;
+	void **glists_virt_base;
+	dma_addr_t *glists_dma_base;
+	u32 glist_entry_size;
 
 	/** Pointer to the NIC properties for the Octeon device this network
 	 *  interface is associated with.
@@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer)
 #define lio_dma_free(oct, size, virt_addr, dma_addr) \
 	dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
 
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+		      struct octeon_droq *droq)
+{
+	void *virt_ptr;
+
+	virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+				 &droq->info_list_dma);
+	if (virt_ptr) {
+		droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+		droq->info_base_addr = virt_ptr;
+	}
+
+	return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+					struct octeon_droq *droq)
+{
+	lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+		     droq->info_list_dma);
+}
+
 static inline
 void *get_rbd(struct sk_buff *skb)
 {
@@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb)
 static inline u64
 lio_map_ring_info(struct octeon_droq *droq, u32 i)
 {
-	dma_addr_t dma_addr;
-	struct octeon_device *oct = droq->oct_dev;
-
-	dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
-				  OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
-	WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
-	return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
-		    u64 info_ptr, u32 size)
-{
-	dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+	return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
 }
 
 static inline u64
-- 
cgit v1.2.3


From 3b12f73a5c2977153f28a224392fd4729b50d1dc Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Tue, 7 Mar 2017 02:48:36 -0500
Subject: rds: ib: add error handle

In the function rds_ib_setup_qp, the error handle is missing. When some
error occurs, it is possible that memory leak occurs. As such, error
handle is added.

Cc: Joe Jin <joe.jin@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Guanglei Li <guanglei.li@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_cm.c | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index ce3775abc6e7..1c38d2c7caa8 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 		ic->i_send_cq = NULL;
 		ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
 		rdsdebug("ib_create_cq send failed: %d\n", ret);
-		goto out;
+		goto rds_ibdev_out;
 	}
 
 	ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
@@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 		ic->i_recv_cq = NULL;
 		ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
 		rdsdebug("ib_create_cq recv failed: %d\n", ret);
-		goto out;
+		goto send_cq_out;
 	}
 
 	ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
 	if (ret) {
 		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
-		goto out;
+		goto recv_cq_out;
 	}
 
 	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
 	if (ret) {
 		rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
-		goto out;
+		goto recv_cq_out;
 	}
 
 	/* XXX negotiate max send/recv with remote? */
@@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
 	if (ret) {
 		rdsdebug("rdma_create_qp failed: %d\n", ret);
-		goto out;
+		goto recv_cq_out;
 	}
 
 	ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
@@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_send_hdrs) {
 		ret = -ENOMEM;
 		rdsdebug("ib_dma_alloc_coherent send failed\n");
-		goto out;
+		goto qp_out;
 	}
 
 	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
@@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_recv_hdrs) {
 		ret = -ENOMEM;
 		rdsdebug("ib_dma_alloc_coherent recv failed\n");
-		goto out;
+		goto send_hdrs_dma_out;
 	}
 
 	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
@@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_ack) {
 		ret = -ENOMEM;
 		rdsdebug("ib_dma_alloc_coherent ack failed\n");
-		goto out;
+		goto recv_hdrs_dma_out;
 	}
 
 	ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
@@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_sends) {
 		ret = -ENOMEM;
 		rdsdebug("send allocation failed\n");
-		goto out;
+		goto ack_dma_out;
 	}
 
 	ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
@@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_recvs) {
 		ret = -ENOMEM;
 		rdsdebug("recv allocation failed\n");
-		goto out;
+		goto sends_out;
 	}
 
 	rds_ib_recv_init_ack(ic);
@@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
 		 ic->i_send_cq, ic->i_recv_cq);
 
-out:
+	return ret;
+
+sends_out:
+	vfree(ic->i_sends);
+ack_dma_out:
+	ib_dma_free_coherent(dev, sizeof(struct rds_header),
+			     ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+	ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+					sizeof(struct rds_header),
+					ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+	ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+					sizeof(struct rds_header),
+					ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+	rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+	if (!ib_destroy_cq(ic->i_recv_cq))
+		ic->i_recv_cq = NULL;
+send_cq_out:
+	if (!ib_destroy_cq(ic->i_send_cq))
+		ic->i_send_cq = NULL;
+rds_ibdev_out:
+	rds_ib_remove_conn(rds_ibdev, conn);
 	rds_ib_dev_put(rds_ibdev);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 83abb7d7c91f4ac20e47c3089a10bb93b2ea8994 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 7 Mar 2017 18:09:08 +0530
Subject: net: thunderx: Fix IOMMU translation faults

ACPI support has been added to ARM IOMMU driver in 4.10 kernel
and that has resulted in VNIC interfaces throwing translation
faults when kernel is booted with ACPI as driver was not using
DMA API. This patch fixes the issue by using DMA API which inturn
will create translation tables when IOMMU is enabled.

Also VNIC doesn't have a seperate receive buffer ring per receive
queue, so there is no 1:1 descriptor index matching between CQE_RX
and the index in buffer ring from where a buffer has been used for
DMA'ing. Unlike other NICs, here it's not possible to maintain dma
address to virt address mappings within the driver. This leaves us
no other choice but to use IOMMU's IOVA address conversion API to
get buffer's virtual address which can be given to network stack
for processing.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic.h          |   1 +
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   |  12 +-
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 178 ++++++++++++++++-----
 drivers/net/ethernet/cavium/thunder/nicvf_queues.h |   4 +-
 4 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index e739c7153562..2269ff562d95 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -269,6 +269,7 @@ struct nicvf {
 #define	MAX_QUEUES_PER_QSET			8
 	struct queue_set	*qs;
 	struct nicvf_cq_poll	*napi[8];
+	void			*iommu_domain;
 	u8			vf_id;
 	u8			sqs_id;
 	bool                    sqs_mode;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 6feaa24bcfd4..24017588f531 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -16,6 +16,7 @@
 #include <linux/log2.h>
 #include <linux/prefetch.h>
 #include <linux/irq.h>
+#include <linux/iommu.h>
 
 #include "nic_reg.h"
 #include "nic.h"
@@ -525,7 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
 			/* Get actual TSO descriptors and free them */
 			tso_sqe =
 			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+						 tso_sqe->subdesc_cnt);
 			nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+		} else {
+			nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+						 hdr->subdesc_cnt);
 		}
 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 		prefetch(skb);
@@ -576,6 +582,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 {
 	struct sk_buff *skb;
 	struct nicvf *nic = netdev_priv(netdev);
+	struct nicvf *snic = nic;
 	int err = 0;
 	int rq_idx;
 
@@ -592,7 +599,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 	if (err && !cqe_rx->rb_cnt)
 		return;
 
-	skb = nicvf_get_rcv_skb(nic, cqe_rx);
+	skb = nicvf_get_rcv_skb(snic, cqe_rx);
 	if (!skb) {
 		netdev_dbg(nic->netdev, "Packet not received\n");
 		return;
@@ -1643,6 +1650,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pass1_silicon(nic->pdev))
 		nic->hw_tso = true;
 
+	/* Get iommu domain for iova to physical addr conversion */
+	nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
 	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
 	if (sdevid == 0xA134)
 		nic->t88 = true;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index ac0390be3b12..b1962dbd0409 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -10,6 +10,7 @@
 #include <linux/netdevice.h>
 #include <linux/ip.h>
 #include <linux/etherdevice.h>
+#include <linux/iommu.h>
 #include <net/ip.h>
 #include <net/tso.h>
 
@@ -18,6 +19,16 @@
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
+#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+	/* Translation is installed only when IOMMU is present */
+	if (nic->iommu_domain)
+		return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+	return dma_addr;
+}
+
 static void nicvf_get_page(struct nicvf *nic)
 {
 	if (!nic->rb_pageref || !nic->rb_page)
@@ -87,7 +98,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 					 u32 buf_len, u64 **rbuf)
 {
-	int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
+	int order = NICVF_PAGE_ORDER;
 
 	/* Check if request can be accomodated in previous allocated page */
 	if (nic->rb_page &&
@@ -97,22 +108,27 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 	}
 
 	nicvf_get_page(nic);
-	nic->rb_page = NULL;
 
 	/* Allocate a new page */
+	nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+				   order);
 	if (!nic->rb_page) {
-		nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
-					   order);
-		if (!nic->rb_page) {
-			this_cpu_inc(nic->pnicvf->drv_stats->
-				     rcv_buffer_alloc_failures);
-			return -ENOMEM;
-		}
-		nic->rb_page_offset = 0;
+		this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+		return -ENOMEM;
 	}
-
+	nic->rb_page_offset = 0;
 ret:
-	*rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+	/* HW will ensure data coherency, CPU sync not required */
+	*rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+						nic->rb_page_offset, buf_len,
+						DMA_FROM_DEVICE,
+						DMA_ATTR_SKIP_CPU_SYNC));
+	if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+		if (!nic->rb_page_offset)
+			__free_pages(nic->rb_page, order);
+		nic->rb_page = NULL;
+		return -ENOMEM;
+	}
 	nic->rb_page_offset += buf_len;
 
 	return 0;
@@ -158,16 +174,21 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
 	rbdr->dma_size = buf_size;
 	rbdr->enable = true;
 	rbdr->thresh = RBDR_THRESH;
+	rbdr->head = 0;
+	rbdr->tail = 0;
 
 	nic->rb_page = NULL;
 	for (idx = 0; idx < ring_len; idx++) {
 		err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
 					     &rbuf);
-		if (err)
+		if (err) {
+			/* To free already allocated and mapped ones */
+			rbdr->tail = idx - 1;
 			return err;
+		}
 
 		desc = GET_RBDR_DESC(rbdr, idx);
-		desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+		desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
 	}
 
 	nicvf_get_page(nic);
@@ -179,7 +200,7 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 {
 	int head, tail;
-	u64 buf_addr;
+	u64 buf_addr, phys_addr;
 	struct rbdr_entry_t *desc;
 
 	if (!rbdr)
@@ -192,18 +213,26 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 	head = rbdr->head;
 	tail = rbdr->tail;
 
-	/* Free SKBs */
+	/* Release page references */
 	while (head != tail) {
 		desc = GET_RBDR_DESC(rbdr, head);
-		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-		put_page(virt_to_page(phys_to_virt(buf_addr)));
+		buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+		phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+		dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+		if (phys_addr)
+			put_page(virt_to_page(phys_to_virt(phys_addr)));
 		head++;
 		head &= (rbdr->dmem.q_len - 1);
 	}
-	/* Free SKB of tail desc */
+	/* Release buffer of tail desc */
 	desc = GET_RBDR_DESC(rbdr, tail);
-	buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-	put_page(virt_to_page(phys_to_virt(buf_addr)));
+	buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+	phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+	dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+			     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (phys_addr)
+		put_page(virt_to_page(phys_to_virt(phys_addr)));
 
 	/* Free RBDR ring */
 	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
@@ -250,7 +279,7 @@ refill:
 			break;
 
 		desc = GET_RBDR_DESC(rbdr, tail);
-		desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+		desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
 		refill_rb_cnt--;
 		new_rb++;
 	}
@@ -361,9 +390,29 @@ static int nicvf_init_snd_queue(struct nicvf *nic,
 	return 0;
 }
 
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+			      int hdr_sqe, u8 subdesc_cnt)
+{
+	u8 idx;
+	struct sq_gather_subdesc *gather;
+
+	/* Unmap DMA mapped skb data buffers */
+	for (idx = 0; idx < subdesc_cnt; idx++) {
+		hdr_sqe++;
+		hdr_sqe &= (sq->dmem.q_len - 1);
+		gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+		/* HW will ensure data coherency, CPU sync not required */
+		dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+				     gather->size, DMA_TO_DEVICE,
+				     DMA_ATTR_SKIP_CPU_SYNC);
+	}
+}
+
 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 {
 	struct sk_buff *skb;
+	struct sq_hdr_subdesc *hdr;
+	struct sq_hdr_subdesc *tso_sqe;
 
 	if (!sq)
 		return;
@@ -379,8 +428,22 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 	smp_rmb();
 	while (sq->head != sq->tail) {
 		skb = (struct sk_buff *)sq->skbuff[sq->head];
-		if (skb)
-			dev_kfree_skb_any(skb);
+		if (!skb)
+			goto next;
+		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+		/* Check for dummy descriptor used for HW TSO offload on 88xx */
+		if (hdr->dont_send) {
+			/* Get actual TSO descriptors and unmap them */
+			tso_sqe =
+			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+						 tso_sqe->subdesc_cnt);
+		} else {
+			nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+						 hdr->subdesc_cnt);
+		}
+		dev_kfree_skb_any(skb);
+next:
 		sq->head++;
 		sq->head &= (sq->dmem.q_len - 1);
 	}
@@ -882,6 +945,14 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
 	return qentry;
 }
 
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+					  int qentry, int desc_cnt)
+{
+	sq->tail = qentry;
+	atomic_add(desc_cnt, &sq->free_cnt);
+}
+
 /* Free descriptor back to SQ for future use */
 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
 {
@@ -1207,8 +1278,9 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 			struct sk_buff *skb, u8 sq_num)
 {
 	int i, size;
-	int subdesc_cnt, tso_sqe = 0;
+	int subdesc_cnt, hdr_sqe = 0;
 	int qentry;
+	u64 dma_addr;
 
 	subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
 	if (subdesc_cnt > atomic_read(&sq->free_cnt))
@@ -1223,12 +1295,21 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 	/* Add SQ header subdesc */
 	nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
 				 skb, skb->len);
-	tso_sqe = qentry;
+	hdr_sqe = qentry;
 
 	/* Add SQ gather subdescs */
 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
 	size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
-	nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+	/* HW will ensure data coherency, CPU sync not required */
+	dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+				      offset_in_page(skb->data), size,
+				      DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+		nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+		return 0;
+	}
+
+	nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
 
 	/* Check for scattered buffer */
 	if (!skb_is_nonlinear(skb))
@@ -1241,15 +1322,26 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
 		size = skb_frag_size(frag);
-		nicvf_sq_add_gather_subdesc(sq, qentry, size,
-					    virt_to_phys(
-					    skb_frag_address(frag)));
+		dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+					      skb_frag_page(frag),
+					      frag->page_offset, size,
+					      DMA_TO_DEVICE,
+					      DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+			/* Free entire chain of mapped buffers
+			 * here 'i' = frags mapped + above mapped skb->data
+			 */
+			nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+			nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+			return 0;
+		}
+		nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
 	}
 
 doorbell:
 	if (nic->t88 && skb_shinfo(skb)->gso_size) {
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
-		nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+		nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
 	}
 
 	nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
@@ -1282,6 +1374,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 	int offset;
 	u16 *rb_lens = NULL;
 	u64 *rb_ptrs = NULL;
+	u64 phys_addr;
 
 	rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
 	/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
@@ -1296,15 +1389,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 	else
 		rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
 
-	netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
-		   __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
-
 	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
 		payload_len = rb_lens[frag_num(frag)];
+		phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+		if (!phys_addr) {
+			if (skb)
+				dev_kfree_skb_any(skb);
+			return NULL;
+		}
+
 		if (!frag) {
 			/* First fragment */
+			dma_unmap_page_attrs(&nic->pdev->dev,
+					     *rb_ptrs - cqe_rx->align_pad,
+					     RCV_FRAG_LEN, DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
 			skb = nicvf_rb_ptr_to_skb(nic,
-						  *rb_ptrs - cqe_rx->align_pad,
+						  phys_addr - cqe_rx->align_pad,
 						  payload_len);
 			if (!skb)
 				return NULL;
@@ -1312,8 +1413,11 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 			skb_put(skb, payload_len);
 		} else {
 			/* Add fragments */
-			page = virt_to_page(phys_to_virt(*rb_ptrs));
-			offset = phys_to_virt(*rb_ptrs) - page_address(page);
+			dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs,
+					     RCV_FRAG_LEN, DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+			page = virt_to_page(phys_to_virt(phys_addr));
+			offset = phys_to_virt(phys_addr) - page_address(page);
 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 					offset, payload_len, RCV_FRAG_LEN);
 		}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 5cb84da99a2d..10cb4b84625b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -87,7 +87,7 @@
 #define RCV_BUF_COUNT		(1ULL << (RBDR_SIZE + 13))
 #define MAX_RCV_BUF_COUNT	(1ULL << (RBDR_SIZE6 + 13))
 #define RBDR_THRESH		(RCV_BUF_COUNT / 2)
-#define DMA_BUFFER_LEN		2048 /* In multiples of 128bytes */
+#define DMA_BUFFER_LEN		1536 /* In multiples of 128bytes */
 #define RCV_FRAG_LEN	 (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
@@ -301,6 +301,8 @@ struct queue_set {
 
 #define	CQ_ERR_MASK	(CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
 
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+			      int hdr_sqe, u8 subdesc_cnt);
 void nicvf_config_vlan_stripping(struct nicvf *nic,
 				 netdev_features_t features);
 int nicvf_set_qset_resources(struct nicvf *nic);
-- 
cgit v1.2.3


From 18de7ba95f6e5ab150e482618123d92ee2240dc0 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 7 Mar 2017 18:09:09 +0530
Subject: net: thunderx: Fix LMAC mode debug prints for QSGMII mode

When BGX/LMACs are in QSGMII mode, for some LMACs, mode info is
not being printed. This patch will fix that. With changes already
done to not do any sort of serdes 2 lane mapping config calculation
in kernel driver, we can get rid of this logic.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 4c8e8cf730bb..9b8a53e138e7 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1011,12 +1011,6 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
 			dev_info(dev, "%s: 40G_KR4\n", (char *)str);
 		break;
 	case BGX_MODE_QSGMII:
-		if ((lmacid == 0) &&
-		    (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
-			return;
-		if ((lmacid == 2) &&
-		    (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
-			return;
 		dev_info(dev, "%s: QSGMII\n", (char *)str);
 		break;
 	case BGX_MODE_RGMII:
-- 
cgit v1.2.3


From 78aacb6f6eeea3c581a29b4a50438d0bdf85ad0b Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 7 Mar 2017 18:09:10 +0530
Subject: net: thunderx: Fix invalid mac addresses for node1 interfaces

When booted with ACPI, random mac addresses are being
assigned to node1 interfaces due to mismatch of bgx_id
in BGX driver and ACPI tables.

This patch fixes this issue by setting maximum BGX devices
per node based on platform/soc instead of a macro. This
change will set the bgx_id appropriately.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 58 ++++++++++++++++++-----
 drivers/net/ethernet/cavium/thunder/thunder_bgx.h |  1 -
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 9b8a53e138e7..64a1095e4d14 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -123,14 +123,44 @@ static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
 	return 1;
 }
 
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+	u16 sdevid;
+
+	if (max_bgx_per_node)
+		return;
+
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	switch (sdevid) {
+	case PCI_SUBSYS_DEVID_81XX_BGX:
+		max_bgx_per_node = MAX_BGX_PER_CN81XX;
+		break;
+	case PCI_SUBSYS_DEVID_83XX_BGX:
+		max_bgx_per_node = MAX_BGX_PER_CN83XX;
+		break;
+	case PCI_SUBSYS_DEVID_88XX_BGX:
+	default:
+		max_bgx_per_node = MAX_BGX_PER_CN88XX;
+		break;
+	}
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+	int idx = (node * max_bgx_per_node) + bgx_idx;
+
+	return bgx_vnic[idx];
+}
+
 /* Return number of BGX present in HW */
 unsigned bgx_get_map(int node)
 {
 	int i;
 	unsigned map = 0;
 
-	for (i = 0; i < MAX_BGX_PER_NODE; i++) {
-		if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
+	for (i = 0; i < max_bgx_per_node; i++) {
+		if (bgx_vnic[(node * max_bgx_per_node) + i])
 			map |= (1 << i);
 	}
 
@@ -143,7 +173,7 @@ int bgx_get_lmac_count(int node, int bgx_idx)
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (bgx)
 		return bgx->lmac_count;
 
@@ -158,7 +188,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
 	struct bgx *bgx;
 	struct lmac *lmac;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return;
 
@@ -172,7 +202,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state);
 
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 
 	if (bgx)
 		return bgx->lmac[lmacid].mac;
@@ -183,7 +213,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac);
 
 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 
 	if (!bgx)
 		return;
@@ -194,7 +224,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac);
 
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 	struct lmac *lmac;
 	u64 cfg;
 
@@ -217,7 +247,7 @@ EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
 {
 	struct pfc *pfc = (struct pfc *)pause;
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 	struct lmac *lmac;
 	u64 cfg;
 
@@ -237,7 +267,7 @@ EXPORT_SYMBOL(bgx_lmac_get_pfc);
 void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
 {
 	struct pfc *pfc = (struct pfc *)pause;
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 	struct lmac *lmac;
 	u64 cfg;
 
@@ -369,7 +399,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return 0;
 
@@ -383,7 +413,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return 0;
 
@@ -411,7 +441,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
 	struct lmac *lmac;
 	u64    cfg;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return;
 
@@ -1328,11 +1358,13 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_release_regions;
 	}
 
+	set_max_bgx_per_node(pdev);
+
 	pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
 	if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
 		bgx->bgx_id = (pci_resource_start(pdev,
 			PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
-		bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+		bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
 		bgx->max_lmac = MAX_LMAC_PER_BGX;
 		bgx_vnic[bgx->bgx_id] = bgx;
 	} else {
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index a60f189429bb..c5080f2cead5 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -22,7 +22,6 @@
 #define    MAX_BGX_PER_CN88XX			2
 #define    MAX_BGX_PER_CN81XX			3 /* 2 BGXs + 1 RGX */
 #define    MAX_BGX_PER_CN83XX			4
-#define    MAX_BGX_PER_NODE			4
 #define    MAX_LMAC_PER_BGX			4
 #define    MAX_BGX_CHANS_PER_LMAC		16
 #define    MAX_DMAC_PER_LMAC			8
-- 
cgit v1.2.3


From 36fa35d22bffc78c85b5e68adbdd99e914bec764 Mon Sep 17 00:00:00 2001
From: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Date: Tue, 7 Mar 2017 18:09:11 +0530
Subject: net: thunderx: Allow IPv6 frames with zero UDP checksum

Do not consider IPv6 frames with zero UDP checksum as frames
with bad checksum and drop them.

Signed-off-by: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index b1962dbd0409..f13289f0d238 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -622,9 +622,11 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	if (!nic->sqs_mode && (qidx == 0)) {
-		/* Enable checking L3/L4 length and TCP/UDP checksums */
+		/* Enable checking L3/L4 length and TCP/UDP checksums
+		 * Also allow IPv6 pkts with zero UDP checksum.
+		 */
 		nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
-				      (BIT(24) | BIT(23) | BIT(21)));
+				      (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
 		nicvf_config_vlan_stripping(nic, nic->netdev->features);
 	}
 
-- 
cgit v1.2.3


From 8aad6f14c09d78862fc04e47214d398add9bb8ce Mon Sep 17 00:00:00 2001
From: "robert.foss@collabora.com" <robert.foss@collabora.com>
Date: Tue, 7 Mar 2017 11:46:25 -0500
Subject: qed: Fix copy of uninitialized memory

In qed_ll2_start_ooo() the ll2_info variable is uninitialized and then
passed to qed_ll2_acquire_connection() where it is copied into a new
memory space.

This shouldn't cause any issue as long as non of the copied memory is
every read.
But the potential for a bug being introduced by reading this memory
is real.

Detected by CoverityScan, CID#1399632 ("Uninitialized scalar variable")

Signed-off-by: Robert Foss <robert.foss@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 9a0b9af10a57..5fb34db377c8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -968,7 +968,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
-	struct qed_ll2_conn ll2_info;
+	struct qed_ll2_conn ll2_info = { 0 };
 	int rc;
 
 	ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;
-- 
cgit v1.2.3


From 294acf1c01bace5cea5d30b510504238bf5f7c25 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 7 Mar 2017 18:33:31 +0100
Subject: net/tunnel: set inner protocol in network gro hooks

The gso code of several tunnels type (gre and udp tunnels)
takes for granted that the skb->inner_protocol is properly
initialized and drops the packet elsewhere.

On the forwarding path no one is initializing such field,
so gro encapsulated packets are dropped on forward.

Since commit 38720352412a ("gre: Use inner_proto to obtain
inner header protocol"), this can be reproduced when the
encapsulated packets use gre as the tunneling protocol.

The issue happens also with vxlan and geneve tunnels since
commit 8bce6d7d0d1e ("udp: Generalize skb_udp_segment"), if the
forwarding host's ingress nic has h/w offload for such tunnel
and a vxlan/geneve device is configured on top of it, regardless
of the configured peer address and vni.

To address the issue, this change initialize the inner_protocol
field for encapsulated packets in both ipv4 and ipv6 gro complete
callbacks.

Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol")
Fixes: 8bce6d7d0d1e ("udp: Generalize skb_udp_segment")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c     | 4 +++-
 net/ipv6/ip6_offload.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 602d40f43687..5091f46826fa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1487,8 +1487,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 	int proto = iph->protocol;
 	int err = -ENOSYS;
 
-	if (skb->encapsulation)
+	if (skb->encapsulation) {
+		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
 		skb_set_inner_network_header(skb, nhoff);
+	}
 
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 0838e6d01d2e..93e58a5e1837 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
 	int err = -ENOSYS;
 
-	if (skb->encapsulation)
+	if (skb->encapsulation) {
+		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
 		skb_set_inner_network_header(skb, nhoff);
+	}
 
 	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
 
-- 
cgit v1.2.3


From 745cb7f8a5de0805cade3de3991b7a95317c7c73 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Tue, 7 Mar 2017 23:50:50 +0300
Subject: uapi: fix linux/packet_diag.h userspace compilation error

Replace MAX_ADDR_LEN with its numeric value to fix the following
linux/packet_diag.h userspace compilation error:

/usr/include/linux/packet_diag.h:67:17: error: 'MAX_ADDR_LEN' undeclared here (not in a function)
  __u8 pdmc_addr[MAX_ADDR_LEN];

This is not the first case in the UAPI where the numeric value
of MAX_ADDR_LEN is used instead of symbolic one, uapi/linux/if_link.h
already does the same:

$ grep MAX_ADDR_LEN include/uapi/linux/if_link.h
	__u8 mac[32]; /* MAX_ADDR_LEN */

There are no UAPI headers besides these two that use MAX_ADDR_LEN.

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Acked-by: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/packet_diag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index d08c63f3dd6f..0c5d5dd61b6a 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -64,7 +64,7 @@ struct packet_diag_mclist {
 	__u32	pdmc_count;
 	__u16	pdmc_type;
 	__u16	pdmc_alen;
-	__u8	pdmc_addr[MAX_ADDR_LEN];
+	__u8	pdmc_addr[32]; /* MAX_ADDR_LEN */
 };
 
 struct packet_diag_ring {
-- 
cgit v1.2.3


From 9f691549f76d488a0c74397b3e51e943865ea01f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 7 Mar 2017 20:00:12 -0800
Subject: bpf: fix struct htab_elem layout

when htab_elem is removed from the bucket list the htab_elem.hash_node.next
field should not be overridden too early otherwise we have a tiny race window
between lookup and delete.
The bug was discovered by manual code analysis and reproducible
only with explicit udelay() in lookup_elem_raw().

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Reported-by: Jonathan Perry <jonperry@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/hashtab.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3ea87fb19a94..63c86a7be2a1 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -45,8 +45,13 @@ enum extra_elem_state {
 struct htab_elem {
 	union {
 		struct hlist_node hash_node;
-		struct bpf_htab *htab;
-		struct pcpu_freelist_node fnode;
+		struct {
+			void *padding;
+			union {
+				struct bpf_htab *htab;
+				struct pcpu_freelist_node fnode;
+			};
+		};
 	};
 	union {
 		struct rcu_head rcu;
@@ -162,7 +167,8 @@ skip_percpu_elems:
 				 offsetof(struct htab_elem, lru_node),
 				 htab->elem_size, htab->map.max_entries);
 	else
-		pcpu_freelist_populate(&htab->freelist, htab->elems,
+		pcpu_freelist_populate(&htab->freelist,
+				       htab->elems + offsetof(struct htab_elem, fnode),
 				       htab->elem_size, htab->map.max_entries);
 
 	return 0;
@@ -217,6 +223,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	int err, i;
 	u64 cost;
 
+	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+		     offsetof(struct htab_elem, hash_node.pprev));
+	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+		     offsetof(struct htab_elem, hash_node.pprev));
+
 	if (lru && !capable(CAP_SYS_ADMIN))
 		/* LRU implementation is much complicated than other
 		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
@@ -582,9 +593,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 	int err = 0;
 
 	if (prealloc) {
-		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
-		if (!l_new)
+		struct pcpu_freelist_node *l;
+
+		l = pcpu_freelist_pop(&htab->freelist);
+		if (!l)
 			err = -E2BIG;
+		else
+			l_new = container_of(l, struct htab_elem, fnode);
 	} else {
 		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
 			atomic_dec(&htab->count);
-- 
cgit v1.2.3


From 4fe8435909fddc97b81472026aa954e06dd192a5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 7 Mar 2017 20:00:13 -0800
Subject: bpf: convert htab map to hlist_nulls

when all map elements are pre-allocated one cpu can delete and reuse htab_elem
while another cpu is still walking the hlist. In such case the lookup may
miss the element. Convert hlist to hlist_nulls to avoid such scenario.
When bucket lock is taken there is no need to take such precautions,
so only convert map_lookup and map_get_next to nulls.
The race window is extremely small and only reproducible with explicit
udelay() inside lookup_nulls_elem_raw()

Similar to hlist add hlist_nulls_for_each_entry_safe() and
hlist_nulls_entry_safe() helpers.

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Reported-by: Jonathan Perry <jonperry@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list_nulls.h    |  5 +++
 include/linux/rculist_nulls.h | 14 +++++++
 kernel/bpf/hashtab.c          | 94 +++++++++++++++++++++++++++----------------
 3 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index b01fe1009084..87ff4f58a2f0 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
 	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
 
 #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+	})
 /**
  * ptr_is_a_nulls - Test if a ptr is a nulls
  * @ptr: ptr to be tested
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 4ae95f7e8597..a23a33153180 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
 
+/**
+ * hlist_nulls_for_each_entry_safe -
+ *   iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)		\
+	for (({barrier();}),							\
+	     pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
+		(!is_a_nulls(pos)) &&						\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);	\
+		   pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
 #endif
 #endif
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 63c86a7be2a1..afe5bab376c9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
 #include <linux/bpf.h>
 #include <linux/jhash.h>
 #include <linux/filter.h>
+#include <linux/rculist_nulls.h>
 #include "percpu_freelist.h"
 #include "bpf_lru_list.h"
 
 struct bucket {
-	struct hlist_head head;
+	struct hlist_nulls_head head;
 	raw_spinlock_t lock;
 };
 
@@ -44,7 +45,7 @@ enum extra_elem_state {
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
-		struct hlist_node hash_node;
+		struct hlist_nulls_node hash_node;
 		struct {
 			void *padding;
 			union {
@@ -337,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		goto free_htab;
 
 	for (i = 0; i < htab->n_buckets; i++) {
-		INIT_HLIST_HEAD(&htab->buckets[i].head);
+		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
@@ -377,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
 	return &htab->buckets[hash & (htab->n_buckets - 1)];
 }
 
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
 {
 	return &__select_bucket(htab, hash)->head;
 }
 
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
 					 void *key, u32 key_size)
 {
+	struct hlist_nulls_node *n;
 	struct htab_elem *l;
 
-	hlist_for_each_entry_rcu(l, head, hash_node)
+	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
 		if (l->hash == hash && !memcmp(&l->key, key, key_size))
 			return l;
 
 	return NULL;
 }
 
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+					       u32 hash, void *key,
+					       u32 key_size, u32 n_buckets)
+{
+	struct hlist_nulls_node *n;
+	struct htab_elem *l;
+
+again:
+	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+			return l;
+
+	if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+		goto again;
+
+	return NULL;
+}
+
 /* Called from syscall or from eBPF program */
 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct htab_elem *l;
 	u32 hash, key_size;
 
@@ -411,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 
 	head = select_bucket(htab, hash);
 
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
 	return l;
 }
@@ -444,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 {
 	struct bpf_htab *htab = (struct bpf_htab *)arg;
-	struct htab_elem *l, *tgt_l;
-	struct hlist_head *head;
+	struct htab_elem *l = NULL, *tgt_l;
+	struct hlist_nulls_head *head;
+	struct hlist_nulls_node *n;
 	unsigned long flags;
 	struct bucket *b;
 
@@ -455,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 
 	raw_spin_lock_irqsave(&b->lock, flags);
 
-	hlist_for_each_entry_rcu(l, head, hash_node)
+	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
 		if (l == tgt_l) {
-			hlist_del_rcu(&l->hash_node);
+			hlist_nulls_del_rcu(&l->hash_node);
 			break;
 		}
 
@@ -470,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct htab_elem *l, *next_l;
 	u32 hash, key_size;
 	int i;
@@ -484,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	head = select_bucket(htab, hash);
 
 	/* lookup the key */
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
 	if (!l) {
 		i = 0;
@@ -492,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	}
 
 	/* key was found, get next key in the same bucket */
-	next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
 				  struct htab_elem, hash_node);
 
 	if (next_l) {
@@ -511,7 +537,7 @@ find_first_elem:
 		head = select_bucket(htab, i);
 
 		/* pick first element in the bucket */
-		next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+		next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
 					  struct htab_elem, hash_node);
 		if (next_l) {
 			/* if it's not empty, just return it */
@@ -676,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -715,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
 	 */
-	hlist_add_head_rcu(&l_new->hash_node, head);
+	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
-		hlist_del_rcu(&l_old->hash_node);
+		hlist_nulls_del_rcu(&l_old->hash_node);
 		free_htab_elem(htab, l_old);
 	}
 	ret = 0;
@@ -731,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new, *l_old = NULL;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -772,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
 	 */
-	hlist_add_head_rcu(&l_new->hash_node, head);
+	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
 		bpf_lru_node_set_ref(&l_new->lru_node);
-		hlist_del_rcu(&l_old->hash_node);
+		hlist_nulls_del_rcu(&l_old->hash_node);
 	}
 	ret = 0;
 
@@ -796,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -835,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 			ret = PTR_ERR(l_new);
 			goto err;
 		}
-		hlist_add_head_rcu(&l_new->hash_node, head);
+		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	}
 	ret = 0;
 err:
@@ -849,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -897,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 	} else {
 		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
 				value, onallcpus);
-		hlist_add_head_rcu(&l_new->hash_node, head);
+		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 		l_new = NULL;
 	}
 	ret = 0;
@@ -925,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 static int htab_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct bucket *b;
 	struct htab_elem *l;
 	unsigned long flags;
@@ -945,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 	l = lookup_elem_raw(head, hash, key, key_size);
 
 	if (l) {
-		hlist_del_rcu(&l->hash_node);
+		hlist_nulls_del_rcu(&l->hash_node);
 		free_htab_elem(htab, l);
 		ret = 0;
 	}
@@ -957,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct bucket *b;
 	struct htab_elem *l;
 	unsigned long flags;
@@ -977,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	l = lookup_elem_raw(head, hash, key, key_size);
 
 	if (l) {
-		hlist_del_rcu(&l->hash_node);
+		hlist_nulls_del_rcu(&l->hash_node);
 		ret = 0;
 	}
 
@@ -992,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
 	int i;
 
 	for (i = 0; i < htab->n_buckets; i++) {
-		struct hlist_head *head = select_bucket(htab, i);
-		struct hlist_node *n;
+		struct hlist_nulls_head *head = select_bucket(htab, i);
+		struct hlist_nulls_node *n;
 		struct htab_elem *l;
 
-		hlist_for_each_entry_safe(l, n, head, hash_node) {
-			hlist_del_rcu(&l->hash_node);
+		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+			hlist_nulls_del_rcu(&l->hash_node);
 			if (l->state != HTAB_EXTRA_ELEM_USED)
 				htab_elem_free(htab, l);
 		}
-- 
cgit v1.2.3


From 834e0f8ae4e104fa026ffe5cdee58491f3078403 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 8 Mar 2017 17:40:17 -0500
Subject: drm/amdgpu: validate paramaters in the gem ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reject it if there are any invalid flags or domains.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 51d759463384..106cf83c2e6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -202,6 +202,27 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	bool kernel = false;
 	int r;
 
+	/* reject invalid gem flags */
+	if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+				      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+				      AMDGPU_GEM_CREATE_VRAM_CLEARED|
+				      AMDGPU_GEM_CREATE_SHADOW |
+				      AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
+		r = -EINVAL;
+		goto error_unlock;
+	}
+	/* reject invalid gem domains */
+	if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
+				 AMDGPU_GEM_DOMAIN_GTT |
+				 AMDGPU_GEM_DOMAIN_VRAM |
+				 AMDGPU_GEM_DOMAIN_GDS |
+				 AMDGPU_GEM_DOMAIN_GWS |
+				 AMDGPU_GEM_DOMAIN_OA)) {
+		r = -EINVAL;
+		goto error_unlock;
+	}
+
 	/* create a gem object to contain this object in */
 	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
 	    AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
-- 
cgit v1.2.3


From a5b11dac1f57c4b327c2d6eccb8fdd01499f9e17 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 8 Mar 2017 17:23:21 -0500
Subject: drm/amdgpu: bump driver version for some new features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We added new gem ioctl flags and the new fences ioctl, but forgot
to bump the version.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 75fc376ba735..f7adbace428a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -59,9 +59,10 @@
  * - 3.7.0 - Add support for VCE clock list packet
  * - 3.8.0 - Add support raster config init in the kernel
  * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	9
+#define KMS_DRIVER_MINOR	10
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
cgit v1.2.3


From 8bd49ac86677ca43d64f08c45864e438283d6a76 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 6 Mar 2017 09:57:17 +0100
Subject: s390: wire up statx system call

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/uapi/asm/unistd.h | 4 +++-
 arch/s390/kernel/compat_wrapper.c   | 1 +
 arch/s390/kernel/syscalls.S         | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 4384bc797a54..152de9b796e1 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,9 @@
 #define __NR_copy_file_range	375
 #define __NR_preadv2		376
 #define __NR_pwritev2		377
-#define NR_syscalls 378
+/* Number 378 is reserved for guarded storage */
+#define __NR_statx		379
+#define NR_syscalls 380
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index ae2cda5eee5a..e89cc2e71db1 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
 COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9b59e6212d8f..2659b5cfeddb 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
 SYSCALL(sys_preadv2,compat_sys_preadv2)
 SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+NI_SYSCALL
+SYSCALL(sys_statx,compat_sys_statx)
-- 
cgit v1.2.3


From 6bbc4a4144b1a69743022ac68dfaf6e7d993abb9 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:28 -0800
Subject: userfaultfd: shmem: __do_fault requires VM_FAULT_NOPAGE

__do_fault assumes vmf->page has been initialized and is valid if
VM_FAULT_NOPAGE is not returned by vma->vm_ops->fault(vma, vmf).

handle_userfault() in turn should return VM_FAULT_NOPAGE if it doesn't
return VM_FAULT_SIGBUS or VM_FAULT_RETRY (the other two possibilities).

This VM_FAULT_NOPAGE case is only invoked when signal are pending and it
didn't matter for anonymous memory before.  It only started to matter
since shmem was introduced.  hugetlbfs also takes a different path and
doesn't exercise __do_fault.

Link: http://lkml.kernel.org/r/20170228154201.GH5816@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579d..f62199b90fd0 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -490,7 +490,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 			 * in such case.
 			 */
 			down_read(&mm->mmap_sem);
-			ret = 0;
+			ret = VM_FAULT_NOPAGE;
 		}
 	}
 
-- 
cgit v1.2.3


From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 9 Mar 2017 16:16:31 -0800
Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  disble||disable
  disbled||disabled

I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c
untouched.  The macro is not referenced at all, but this commit is
touching only comment blocks just in case.

Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kcov.rst        | 2 +-
 arch/cris/arch-v32/drivers/cryptocop.c  | 2 +-
 arch/x86/kernel/ftrace.c                | 2 +-
 drivers/crypto/ux500/cryp/cryp.c        | 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  | 2 +-
 drivers/hv/channel.c                    | 2 +-
 drivers/isdn/hisax/st5481_b.c           | 2 +-
 drivers/mtd/spi-nor/spi-nor.c           | 2 +-
 drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +-
 drivers/scsi/aic7xxx/aic79xx_core.c     | 2 +-
 drivers/usb/gadget/legacy/inode.c       | 3 +--
 drivers/usb/host/xhci.c                 | 4 ++--
 include/linux/regulator/machine.h       | 2 +-
 kernel/cgroup/cgroup.c                  | 2 +-
 kernel/events/core.c                    | 2 +-
 scripts/spelling.txt                    | 2 ++
 sound/soc/amd/acp-pcm-dma.c             | 2 +-
 17 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 2c41b713841f..44886c91e112 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims
 to collect more or less stable coverage that is function of syscall inputs.
 To achieve this goal it does not collect coverage in soft/hard interrupts
 and instrumentation of some inherently non-deterministic parts of kernel is
-disbled (e.g. scheduler, locking).
+disabled (e.g. scheduler, locking).
 
 Usage
 -----
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index ae6903d7fdbe..14970f11bbf2 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void)
 		dma_in_cfg.en = regk_dma_no;
 		REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg);
 
-		/* Disble the cryptocop. */
+		/* Disable the cryptocop. */
 		rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg);
 		rw_cfg.en = 0;
 		REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8639bb2ae058..8f3d9cf26ff9 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -535,7 +535,7 @@ static void run_sync(void)
 {
 	int enable_irqs = irqs_disabled();
 
-	/* We may be called with interrupts disbled (on bootup). */
+	/* We may be called with interrupts disabled (on bootup). */
 	if (enable_irqs)
 		local_irq_enable();
 	on_each_cpu(do_sync_core, NULL, 1);
diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c
index 43a0c8a26ab0..00a16ab601cb 100644
--- a/drivers/crypto/ux500/cryp/cryp.c
+++ b/drivers/crypto/ux500/cryp/cryp.c
@@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data,
 void cryp_flush_inoutfifo(struct cryp_device_data *device_data)
 {
 	/*
-	 * We always need to disble the hardware before trying to flush the
+	 * We always need to disable the hardware before trying to flush the
 	 * FIFO. This is something that isn't written in the design
 	 * specification, but we have been informed by the hardware designers
 	 * that this must be done.
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 31375bdde6f1..011800f621c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 		}
 	}
 
-	/* disble sdma engine before programing it */
+	/* disable sdma engine before programing it */
 	sdma_v3_0_ctx_switch_enable(adev, false);
 	sdma_v3_0_enable(adev, false);
 
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 81a80c82f1bd..bd0d1988feb2 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -543,7 +543,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	/*
 	 * In case a device driver's probe() fails (e.g.,
 	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
-	 * rescinded later (e.g., we dynamically disble an Integrated Service
+	 * rescinded later (e.g., we dynamically disable an Integrated Service
 	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
 	 * here we should skip most of the below cleanup work.
 	 */
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index 409849165838..f64a36007800 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode)
 			}
 		}
 	} else {
-		// Disble B channel interrupts
+		// Disable B channel interrupts
 		st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL);
 
 		// Disable B channel FIFOs
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1ae872bfc3ba..747645c74134 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor)
 }
 
 /*
- * Send write disble instruction to the chip.
+ * Send write disable instruction to the chip.
  */
 static inline int write_disable(struct spi_nor *nor)
 {
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 6d31f92ef2b6..90b3b46f85cc 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1163,7 +1163,7 @@ struct ib_mac_iocb_rsp {
 	u8 opcode;		/* 0x20 */
 	u8 flags1;
 #define IB_MAC_IOCB_RSP_OI	0x01	/* Overide intr delay */
-#define IB_MAC_IOCB_RSP_I	0x02	/* Disble Intr Generation */
+#define IB_MAC_IOCB_RSP_I	0x02	/* Disable Intr Generation */
 #define IB_MAC_CSUM_ERR_MASK 0x1c	/* A mask to use for csum errs */
 #define IB_MAC_IOCB_RSP_TE	0x04	/* Checksum error */
 #define IB_MAC_IOCB_RSP_NU	0x08	/* No checksum rcvd */
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
index 109e2c99e6c1..95d8f25cbcca 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit)
 		 * does not disable its parity logic prior to
 		 * the start of the reset.  This may cause a
 		 * parity error to be detected and thus a
-		 * spurious SERR or PERR assertion.  Disble
+		 * spurious SERR or PERR assertion.  Disable
 		 * PERR and SERR responses during the CHIPRST.
 		 */
 		mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index a2615d64d07c..79a2d8fba6b6 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *);
 
 /* /dev/gadget/$CHIP represents ep0 and the whole device */
 enum ep0_state {
-	/* DISBLED is the initial state.
-	 */
+	/* DISABLED is the initial state. */
 	STATE_DEV_DISABLED = 0,
 
 	/* Only one open() of /dev/gadget/$CHIP; only one file tracks
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6d6c46000e56..50aee8b7718b 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
-	/* disble usb3 ports Wake bits*/
+	/* disable usb3 ports Wake bits */
 	port_index = xhci->num_usb3_ports;
 	port_array = xhci->usb3_ports;
 	while (port_index--) {
@@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 			writel(t2, port_array[port_index]);
 	}
 
-	/* disble usb2 ports Wake bits*/
+	/* disable usb2 ports Wake bits */
 	port_index = xhci->num_usb2_ports;
 	port_array = xhci->usb2_ports;
 	while (port_index--) {
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index ad3e5158e586..c9f795e9a2ee 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -65,7 +65,7 @@ struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
-	int disabled; /* is the regulator disbled in this suspend state */
+	int disabled; /* is the regulator disabled in this suspend state */
 };
 
 /**
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0125589c7428..48851327a15e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css)
  *
  * Returns 0 on success, -errno on failure.  On failure, csses which have
  * been processed already aren't cleaned up.  The caller is responsible for
- * cleaning up with cgroup_apply_control_disble().
+ * cleaning up with cgroup_apply_control_disable().
  */
 static int cgroup_apply_control_enable(struct cgroup *cgrp)
 {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6f41548f2e32..a17ed56c8ce1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event,
  */
 #define PERF_CPU_HRTIMER (1000 / HZ)
 /*
- * function must be called with interrupts disbled
+ * function must be called with interrupts disabled
  */
 static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
 {
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 0458b037c8a1..6dae4df472f6 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -372,6 +372,8 @@ disassocation||disassociation
 disapear||disappear
 disapeared||disappeared
 disappared||disappeared
+disble||disable
+disbled||disabled
 disconnet||disconnect
 discontinous||discontinuous
 dispertion||dispersion
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index ec1067a679da..08b1399d1da2 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg)
 	writel(val, acp_mmio + (reg * 4));
 }
 
-/* Configure a given dma channel parameters - enable/disble,
+/* Configure a given dma channel parameters - enable/disable,
  * number of descriptors, priority
  */
 static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num,
-- 
cgit v1.2.3


From 505d3085d7120a9f4cd0d6ffaa876968854b3baa Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 9 Mar 2017 16:16:33 -0800
Subject: scripts/spelling.txt: add "overide" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  overide||override

While we are here, fix the doubled "address" in the touched line
Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt.

Also, fix the comment block style in the touched hunks in
drivers/media/dvb-frontends/drx39xyj/drx_driver.h.

Link: http://lkml.kernel.org/r/1481573103-11329-21-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt | 2 +-
 drivers/block/paride/pcd.c                                       | 2 +-
 drivers/block/paride/pd.c                                        | 2 +-
 drivers/block/paride/pf.c                                        | 2 +-
 drivers/block/paride/pg.c                                        | 2 +-
 drivers/block/paride/pt.c                                        | 2 +-
 drivers/media/dvb-frontends/drx39xyj/drx_driver.h                | 8 +++-----
 drivers/net/ethernet/qlogic/qlge/qlge.h                          | 2 +-
 include/dt-bindings/sound/cs42l42.h                              | 2 +-
 include/net/irda/timer.h                                         | 2 +-
 kernel/trace/trace_stack.c                                       | 2 +-
 scripts/spelling.txt                                             | 1 +
 tools/lguest/lguest.c                                            | 2 +-
 tools/lib/bpf/Makefile                                           | 2 +-
 tools/lib/traceevent/Makefile                                    | 2 +-
 tools/lib/traceevent/event-parse.h                               | 2 +-
 16 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
index c3f6546ebac7..6a23ad9ac53a 100644
--- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
@@ -45,7 +45,7 @@ Required Properties:
 Optional Properties:
 - reg-names: In addition to the required properties, the following are optional
   - "efuse-address"	- Contains efuse base address used to pick up ABB info.
-  - "ldo-address"	- Contains address of ABB LDO overide register address.
+  - "ldo-address"	- Contains address of ABB LDO override register.
 	"efuse-address" is required for this.
 - ti,ldovbb-vset-mask	- Required if ldo-address is set, mask for LDO override
 	register to provide override vset value.
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 10aed84244f5..939641d6e262 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -50,7 +50,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
                         
-            major       You may use this parameter to overide the
+            major       You may use this parameter to override the
                         default major number (46) that this driver
                         will use.  Be sure to change the device
                         name as well.
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 644ba0888bd4..9cfd2e06a649 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -61,7 +61,7 @@
                         first drive found.
 			
 
-            major       You may use this parameter to overide the
+            major       You may use this parameter to override the
                         default major number (45) that this driver
                         will use.  Be sure to change the device
                         name as well.
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index ed93e8badf56..14c5d32f5d8b 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -59,7 +59,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
 
-	    major	You may use this parameter to overide the
+	    major	You may use this parameter to override the
 			default major number (47) that this driver
 			will use.  Be sure to change the device
 			name as well.
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 5db955fe3a94..3b5882bfb736 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -84,7 +84,7 @@
 			the slower the port i/o.  In some cases, setting
 			this to zero will speed up the device. (default -1)
 
-	    major	You may use this parameter to overide the
+	    major	You may use this parameter to override the
 			default major number (97) that this driver
 			will use.  Be sure to change the device
 			name as well.
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 61fc6824299a..e815312a00ad 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -61,7 +61,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
 
-	    major	You may use this parameter to overide the
+	    major	You may use this parameter to override the
 			default major number (96) that this driver
 			will use.  Be sure to change the device
 			name as well.
diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
index 7a681d8202c7..4442e478db72 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
@@ -256,8 +256,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 *
 * The actual DAP implementation may be restricted to only one of the modes.
 * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the mode defined below.
-*
+* overrides or cannot handle the mode defined below.
 */
 #ifndef DRXDAP_SINGLE_MASTER
 #define DRXDAP_SINGLE_MASTER 1
@@ -272,7 +271,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 *
 * This maximum size may be restricted by the actual DAP implementation.
 * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the chunksize defined below.
+* overrides or cannot handle the chunksize defined below.
 *
 * Beware that the DAP uses  DRXDAP_MAX_WCHUNKSIZE to create a temporary data
 * buffer. Do not undefine or choose too large, unless your system is able to
@@ -292,8 +291,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 *
 * This maximum size may be restricted by the actual DAP implementation.
 * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the chunksize defined below.
-*
+* overrides or cannot handle the chunksize defined below.
 */
 #ifndef DRXDAP_MAX_RCHUNKSIZE
 #define  DRXDAP_MAX_RCHUNKSIZE 60
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 90b3b46f85cc..84ac50f92c9c 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1162,7 +1162,7 @@ struct ob_mac_tso_iocb_rsp {
 struct ib_mac_iocb_rsp {
 	u8 opcode;		/* 0x20 */
 	u8 flags1;
-#define IB_MAC_IOCB_RSP_OI	0x01	/* Overide intr delay */
+#define IB_MAC_IOCB_RSP_OI	0x01	/* Override intr delay */
 #define IB_MAC_IOCB_RSP_I	0x02	/* Disable Intr Generation */
 #define IB_MAC_CSUM_ERR_MASK 0x1c	/* A mask to use for csum errs */
 #define IB_MAC_IOCB_RSP_TE	0x04	/* Checksum error */
diff --git a/include/dt-bindings/sound/cs42l42.h b/include/dt-bindings/sound/cs42l42.h
index 399a123aed58..db69d84ed7d1 100644
--- a/include/dt-bindings/sound/cs42l42.h
+++ b/include/dt-bindings/sound/cs42l42.h
@@ -20,7 +20,7 @@
 #define CS42L42_HPOUT_LOAD_1NF		0
 #define CS42L42_HPOUT_LOAD_10NF		1
 
-/* HPOUT Clamp to GND Overide */
+/* HPOUT Clamp to GND Override */
 #define CS42L42_HPOUT_CLAMP_EN		0
 #define CS42L42_HPOUT_CLAMP_DIS		1
 
diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h
index cb2615ccf761..d784f242cf7b 100644
--- a/include/net/irda/timer.h
+++ b/include/net/irda/timer.h
@@ -59,7 +59,7 @@ struct lap_cb;
  *  Slot timer must never exceed 85 ms, and must always be at least 25 ms, 
  *  suggested to  75-85 msec by IrDA lite. This doesn't work with a lot of
  *  devices, and other stackes uses a lot more, so it's best we do it as well
- *  (Note : this is the default value and sysctl overides it - Jean II)
+ *  (Note : this is the default value and sysctl overrides it - Jean II)
  */
 #define SLOT_TIMEOUT            (90*HZ/1000)
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1d68b5b7ad41..5fb1f2c87e6b 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -65,7 +65,7 @@ void stack_trace_print(void)
 }
 
 /*
- * When arch-specific code overides this function, the following
+ * When arch-specific code overrides this function, the following
  * data should be filled up, assuming stack_trace_max_lock is held to
  * prevent concurrent updates.
  *     stack_trace_index[]
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 6dae4df472f6..0545f5a8cabe 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -734,6 +734,7 @@ oustanding||outstanding
 overaall||overall
 overhread||overhead
 overlaping||overlapping
+overide||override
 overrided||overridden
 overriden||overridden
 overun||overrun
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 11c8d9bc762e..5d19fdf80292 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -1387,7 +1387,7 @@ static bool pci_data_iowrite(u16 port, u32 mask, u32 val)
 		/* Allow writing to any other BAR, or expansion ROM */
 		iowrite(portoff, val, mask, &d->config_words[reg]);
 		return true;
-		/* We let them overide latency timer and cacheline size */
+		/* We let them override latency timer and cacheline size */
 	} else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) {
 		/* Only let them change the first two fields. */
 		if (mask == 0xFFFFFFFF)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e2efddf10231..1f5300e56b44 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -132,7 +132,7 @@ else
   Q = @
 endif
 
-# Disable command line variables (CFLAGS) overide from top
+# Disable command line variables (CFLAGS) override from top
 # level Makefile (perf), otherwise build Makefile will get
 # the same command line setup.
 MAKEOVERRIDES=
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 47076b15eebe..9b8555ea3459 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -135,7 +135,7 @@ else
   Q = @
 endif
 
-# Disable command line variables (CFLAGS) overide from top
+# Disable command line variables (CFLAGS) override from top
 # level Makefile (perf), otherwise build Makefile will get
 # the same command line setup.
 MAKEOVERRIDES=
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 66342804161c..0c03538df74c 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -140,7 +140,7 @@ struct pevent_plugin_option {
  *   struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = {
  *	{
  *		.name = "option-name",
- *		.plugin_alias = "overide-file-name", (optional)
+ *		.plugin_alias = "override-file-name", (optional)
  *		.description = "description of option to show users",
  *	},
  *	{
-- 
cgit v1.2.3


From 52c50ca75c534c0772b71900a29b3a71439b32ef Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:36 -0800
Subject: powerpc/mm: handle protnone ptes on fork

We need to mark pages of parent process read only on fork.  Numa fault
pte needs a protnone ptes variant with saved write flag set.  On fork we
need to make sure we remove the saved write bit.  Instead of adding the
protnone check in the caller update ptep_set_wrprotect variants to clear
savedwrite bit.

Without this we see random segfaults in application on fork.

Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write")
Link: http://lkml.kernel.org/r/1488203787-17849-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 73 ++++++++++++++++------------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1eeeb72c7015..f0b08acda5eb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -347,23 +347,53 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 	__r;							\
 })
 
+static inline int pte_write(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+	/*
+	 * Saved write ptes are prot none ptes that doesn't have
+	 * privileged bit sit. We mark prot none as one which has
+	 * present and pviliged bit set and RWX cleared. To mark
+	 * protnone which used to have _PAGE_WRITE set we clear
+	 * the privileged bit.
+	 */
+	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
+}
+#else
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+	return false;
+}
+#endif
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	else if (unlikely(pte_savedwrite(*ptep)))
+		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+	/*
+	 * We should not find protnone for hugetlb, but this complete the
+	 * interface.
+	 */
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+	else if (unlikely(pte_savedwrite(*ptep)))
+		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
@@ -397,11 +427,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep, ~0UL, 0, 0);
 }
 
-static inline int pte_write(pte_t pte)
-{
-	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
-}
-
 static inline int pte_dirty(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
@@ -466,19 +491,6 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
 	return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
 }
 
-#define pte_savedwrite pte_savedwrite
-static inline bool pte_savedwrite(pte_t pte)
-{
-	/*
-	 * Saved write ptes are prot none ptes that doesn't have
-	 * privileged bit sit. We mark prot none as one which has
-	 * present and pviliged bit set and RWX cleared. To mark
-	 * protnone which used to have _PAGE_WRITE set we clear
-	 * the privileged bit.
-	 */
-	VM_BUG_ON(!pte_protnone(pte));
-	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
-}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline int pte_present(pte_t pte)
@@ -982,11 +994,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-
-	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+	if (pmd_write((*pmdp)))
+		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+	else if (unlikely(pmd_savedwrite(*pmdp)))
+		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
 static inline int pmd_trans_huge(pmd_t pmd)
-- 
cgit v1.2.3


From d19469e8415813cceaa494b6f538e327b9a95f3b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:39 -0800
Subject: power/mm: update pte_write and pte_wrprotect to handle savedwrite

We use pte_write() to check whethwer the pte entry is writable.  This is
mostly used to later mark the pte read only if it is writable.  The other
use of pte_write() is to check whether the pte_entry is writable so that
hardware page table entry can be marked accordingly.  This is used in kvm
where we look at qemu page table entry and update hardware hash page table
for the guest with correct write enable bit.

With the above, for the first usage we should also check the savedwrite
bit so that we can correctly clear the savedwite bit.  For the later, we
add a new variant __pte_write().

With this we can revert write_protect_page part of 595cd8f256d2 ("mm/ksm:
handle protnone saved writes when making page write protect").  But I left
it as it is as an example code for savedwrite check.

Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write")
Link: http://lkml.kernel.org/r/1488203787-17849-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 24 +++++++++++++++++++-----
 arch/powerpc/kvm/book3s_64_mmu_hv.c          |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c          |  2 +-
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f0b08acda5eb..ec1e731e6a2d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -347,7 +347,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 	__r;							\
 })
 
-static inline int pte_write(pte_t pte)
+static inline int __pte_write(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
 }
@@ -373,11 +373,16 @@ static inline bool pte_savedwrite(pte_t pte)
 }
 #endif
 
+static inline int pte_write(pte_t pte)
+{
+	return __pte_write(pte) || pte_savedwrite(pte);
+}
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if (pte_write(*ptep))
+	if (__pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 	else if (unlikely(pte_savedwrite(*ptep)))
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
@@ -390,7 +395,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	 * We should not find protnone for hugetlb, but this complete the
 	 * interface.
 	 */
-	if (pte_write(*ptep))
+	if (__pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 	else if (unlikely(pte_savedwrite(*ptep)))
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
@@ -490,7 +495,13 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
 	VM_BUG_ON(!pte_protnone(pte));
 	return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
 }
-
+#else
+#define pte_clear_savedwrite pte_clear_savedwrite
+static inline pte_t pte_clear_savedwrite(pte_t pte)
+{
+	VM_WARN_ON(1);
+	return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline int pte_present(pte_t pte)
@@ -518,6 +529,8 @@ static inline unsigned long pte_pfn(pte_t pte)
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
+	if (unlikely(pte_savedwrite(pte)))
+		return pte_clear_savedwrite(pte);
 	return __pte(pte_val(pte) & ~_PAGE_WRITE);
 }
 
@@ -938,6 +951,7 @@ static inline int pmd_protnone(pmd_t pmd)
 
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+#define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))
 #define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -994,7 +1008,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-	if (pmd_write((*pmdp)))
+	if (__pmd_write((*pmdp)))
 		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
 	else if (unlikely(pmd_savedwrite(*pmdp)))
 		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f3158fb16de3..8c68145ba1bd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 							 hva, NULL, NULL);
 			if (ptep) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
-				if (pte_write(pte))
+				if (__pte_write(pte))
 					write_ok = 1;
 			}
 			local_irq_restore(flags);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6fca970373ee..ce6f2121fffe 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 		pte = kvmppc_read_update_linux_pte(ptep, writing);
 		if (pte_present(pte) && !pte_protnone(pte)) {
-			if (writing && !pte_write(pte))
+			if (writing && !__pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
 			is_ci = pte_ci(pte);
-- 
cgit v1.2.3


From ef947b2529f918d9606533eb9c32b187ed6a5ede Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Mar 2017 16:16:42 -0800
Subject: x86, mm: fix gup_pte_range() vs DAX mappings

gup_pte_range() fails to check pte_allows_gup() before translating a DAX
pte entry, pte_devmap(), to a page.  This allows writes to read-only
mappings, and bypasses the DAX cacheline dirty tracking due to missed
'mkwrite' faults.  The gup_huge_pmd() path and the gup_huge_pud() path
correctly check pte_allows_gup() before checking for _devmap() entries.

Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Dave Hansen <dave.hansen@linux.intel.com>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Xiong Zhou <xzhou@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 99c7805a9693..9d32ee608807 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 			return 0;
 		}
 
+		if (!pte_allows_gup(pte_val(pte), write)) {
+			pte_unmap(ptep);
+			return 0;
+		}
+
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
@@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 				pte_unmap(ptep);
 				return 0;
 			}
-		} else if (!pte_allows_gup(pte_val(pte), write) ||
-			   pte_special(pte)) {
+		} else if (pte_special(pte)) {
 			pte_unmap(ptep);
 			return 0;
 		}
-- 
cgit v1.2.3


From b2e593e271b0760ebc8999e5f9dd068ae2b9d30a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Mar 2017 16:16:45 -0800
Subject: x86, mm: unify exit paths in gup_pte_range()

All exit paths from gup_pte_range() require pte_unmap() of the original
pte page before returning.  Refactor the code to have a single exit
point to do the unmap.

This mirrors the flow of the generic gup_pte_range() in mm/gup.c.

Link: http://lkml.kernel.org/r/148804251828.36605.14910389618497006945.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 9d32ee608807..1f3b6ef105cd 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -106,36 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	struct dev_pagemap *pgmap = NULL;
-	int nr_start = *nr;
-	pte_t *ptep;
+	int nr_start = *nr, ret = 0;
+	pte_t *ptep, *ptem;
 
-	ptep = pte_offset_map(&pmd, addr);
+	/*
+	 * Keep the original mapped PTE value (ptem) around since we
+	 * might increment ptep off the end of the page when finishing
+	 * our loop iteration.
+	 */
+	ptem = ptep = pte_offset_map(&pmd, addr);
 	do {
 		pte_t pte = gup_get_pte(ptep);
 		struct page *page;
 
 		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_protnone(pte)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		if (pte_protnone(pte))
+			break;
 
-		if (!pte_allows_gup(pte_val(pte), write)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		if (!pte_allows_gup(pte_val(pte), write))
+			break;
 
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
 				undo_dev_pagemap(nr, nr_start, pages);
-				pte_unmap(ptep);
-				return 0;
+				break;
 			}
-		} else if (pte_special(pte)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		} else if (pte_special(pte))
+			break;
+
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
 		get_page(page);
@@ -145,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		(*nr)++;
 
 	} while (ptep++, addr += PAGE_SIZE, addr != end);
-	pte_unmap(ptep - 1);
+	if (addr == end)
+		ret = 1;
+	pte_unmap(ptem);
 
-	return 1;
+	return ret;
 }
 
 static inline void get_head_page_multiple(struct page *page, int nr)
-- 
cgit v1.2.3


From dd0db88d8094a6d9d4d1fc5fcd56ab619f54ccf8 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:49 -0800
Subject: userfaultfd: non-cooperative: rollback userfaultfd_exit

Patch series "userfaultfd non-cooperative further update for 4.11 merge
window".

Unfortunately I noticed one relevant bug in userfaultfd_exit while doing
more testing.  I've been doing testing before and this was also tested
by kbuild bot and exercised by the selftest, but this bug never
reproduced before.

I dropped userfaultfd_exit as result.  I dropped it because of
implementation difficulty in receiving signals in __mmput and because I
think -ENOSPC as result from the background UFFDIO_COPY should be enough
already.

Before I decided to remove userfaultfd_exit, I noticed userfaultfd_exit
wasn't exercised by the selftest and when I tried to exercise it, after
moving it to a more correct place in __mmput where it would make more
sense and where the vma list is stable, it resulted in the
event_wait_completion in D state.  So then I added the second patch to
be sure even if we call userfaultfd_event_wait_completion too late
during task exit(), we won't risk to generate tasks in D state.  The
same check exists in handle_userfault() for the same reason, except it
makes a difference there, while here is just a robustness check and it's
run under WARN_ON_ONCE.

While looking at the userfaultfd_event_wait_completion() function I
looked back at its callers too while at it and I think it's not ok to
stop executing dup_fctx on the fcs list because we relay on
userfaultfd_event_wait_completion to execute
userfaultfd_ctx_put(fctx->orig) which is paired against
userfaultfd_ctx_get(fctx->orig) in dup_userfault just before
list_add(fcs).  This change only takes care of fctx->orig but this area
also needs further review looking for similar problems in fctx->new.

The only patch that is urgent is the first because it's an use after
free during a SMP race condition that affects all processes if
CONFIG_USERFAULTFD=y.  Very hard to reproduce though and probably
impossible without SLUB poisoning enabled.

This patch (of 3):

I once reproduced this oops with the userfaultfd selftest, it's not
easily reproducible and it requires SLUB poisoning to reproduce.

    general protection fault: 0000 [#1] SMP
    Modules linked in:
    CPU: 2 PID: 18421 Comm: userfaultfd Tainted: G               ------------ T 3.10.0+ #15
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
    task: ffff8801f83b9440 ti: ffff8801f833c000 task.ti: ffff8801f833c000
    RIP: 0010:[<ffffffff81451299>]  [<ffffffff81451299>] userfaultfd_exit+0x29/0xa0
    RSP: 0018:ffff8801f833fe80  EFLAGS: 00010202
    RAX: ffff8801f833ffd8 RBX: 6b6b6b6b6b6b6b6b RCX: ffff8801f83b9440
    RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800baf18600
    RBP: ffff8801f833fee8 R08: 0000000000000000 R09: 0000000000000001
    R10: 0000000000000000 R11: ffffffff8127ceb3 R12: 0000000000000000
    R13: ffff8800baf186b0 R14: ffff8801f83b99f8 R15: 00007faed746c700
    FS:  0000000000000000(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
    CR2: 00007faf0966f028 CR3: 0000000001bc6000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    Call Trace:
      do_exit+0x297/0xd10
      SyS_exit+0x17/0x20
      tracesys+0xdd/0xe2
    Code: 00 00 66 66 66 66 90 55 48 89 e5 41 54 53 48 83 ec 58 48 8b 1f 48 85 db 75 11 eb 73 66 0f 1f 44 00 00 48 8b 5b 10 48 85 db 74 64 <4c> 8b a3 b8 00 00 00 4d 85 e4 74 eb 41 f6 84 24 2c 01 00 00 80
    RIP  [<ffffffff81451299>] userfaultfd_exit+0x29/0xa0
     RSP <ffff8801f833fe80>
    ---[ end trace 9fecd6dcb442846a ]---

In the debugger I located the "mm" pointer in the stack and walking
mm->mmap->vm_next through the end shows the vma->vm_next list is fully
consistent and it is null terminated list as expected.  So this has to
be an SMP race condition where userfaultfd_exit was running while the
vma list was being modified by another CPU.

When userfaultfd_exit() run one of the ->vm_next pointers pointed to
SLAB_POISON (RBX is the vma pointer and is 0x6b6b..).

The reason is that it's not running in __mmput but while there are still
other threads running and it's not holding the mmap_sem (it can't as it
has to wait the even to be received by the manager).  So this is an use
after free that was happening for all processes.

One more implementation problem aside from the race condition:
userfaultfd_exit has really to check a flag in mm->flags before walking
the vma or it's going to slowdown the exit() path for regular tasks.

One more implementation problem: at that point signals can't be
delivered so it would also create a task in D state if the manager
doesn't read the event.

The major design issue: it overall looks superfluous as the manager can
check for -ENOSPC in the background transfer:

	if (mmget_not_zero(ctx->mm)) {
[..]
	} else {
		return -ENOSPC;
	}

It's safer to roll it back and re-introduce it later if at all.

[rppt@linux.vnet.ibm.com: documentation fixup after removal of UFFD_EVENT_EXIT]
  Link: http://lkml.kernel.org/r/1488345437-4364-1-git-send-email-rppt@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20170224181957.19736-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/userfaultfd.txt |  4 ----
 fs/userfaultfd.c                 | 28 ----------------------------
 include/linux/userfaultfd_k.h    |  6 ------
 include/uapi/linux/userfaultfd.h |  5 +----
 kernel/exit.c                    |  1 -
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt
index 0e5543a920e5..bb2f945f87ab 100644
--- a/Documentation/vm/userfaultfd.txt
+++ b/Documentation/vm/userfaultfd.txt
@@ -172,10 +172,6 @@ the same read(2) protocol as for the page fault notifications. The
 manager has to explicitly enable these events by setting appropriate
 bits in uffdio_api.features passed to UFFDIO_API ioctl:
 
-UFFD_FEATURE_EVENT_EXIT - enable notification about exit() of the
-non-cooperative process. When the monitored process exits, the uffd
-manager will get UFFD_EVENT_EXIT.
-
 UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When
 this feature is enabled, the userfaultfd context of the parent process
 is duplicated into the newly created process. The manager receives
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f62199b90fd0..16d0cc600fa9 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -775,34 +775,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
 	}
 }
 
-void userfaultfd_exit(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma = mm->mmap;
-
-	/*
-	 * We can do the vma walk without locking because the caller
-	 * (exit_mm) knows it now has exclusive access
-	 */
-	while (vma) {
-		struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
-
-		if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
-			struct userfaultfd_wait_queue ewq;
-
-			userfaultfd_ctx_get(ctx);
-
-			msg_init(&ewq.msg);
-			ewq.msg.event = UFFD_EVENT_EXIT;
-
-			userfaultfd_event_wait_completion(ctx, &ewq);
-
-			ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
-		}
-
-		vma = vma->vm_next;
-	}
-}
-
 static int userfaultfd_release(struct inode *inode, struct file *file)
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 0468548acebf..f2b79bf4c895 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -72,8 +72,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 				       struct list_head *uf);
 
-extern void userfaultfd_exit(struct mm_struct *mm);
-
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -139,10 +137,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
 {
 }
 
-static inline void userfaultfd_exit(struct mm_struct *mm)
-{
-}
-
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index c055947c5c98..3b059530dac9 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -18,8 +18,7 @@
  * means the userland is reading).
  */
 #define UFFD_API ((__u64)0xAA)
-#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT |		\
-			   UFFD_FEATURE_EVENT_FORK |		\
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |		\
 			   UFFD_FEATURE_EVENT_REMAP |		\
 			   UFFD_FEATURE_EVENT_REMOVE |	\
 			   UFFD_FEATURE_EVENT_UNMAP |		\
@@ -113,7 +112,6 @@ struct uffd_msg {
 #define UFFD_EVENT_REMAP	0x14
 #define UFFD_EVENT_REMOVE	0x15
 #define UFFD_EVENT_UNMAP	0x16
-#define UFFD_EVENT_EXIT		0x17
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
@@ -163,7 +161,6 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
-#define UFFD_FEATURE_EVENT_EXIT			(1<<7)
 	__u64 features;
 
 	__u64 ioctls;
diff --git a/kernel/exit.c b/kernel/exit.c
index e126ebf2400c..516acdb0e0ec 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -554,7 +554,6 @@ static void exit_mm(void)
 	enter_lazy_tlb(mm, current);
 	task_unlock(current);
 	mm_update_next_owner(mm);
-	userfaultfd_exit(mm);
 	mmput(mm);
 	if (test_thread_flag(TIF_MEMDIE))
 		exit_oom_victim();
-- 
cgit v1.2.3


From 9a69a829f9b656c2a220d65a94ecf7b5887c5da1 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:52 -0800
Subject: userfaultfd: non-cooperative: robustness check

Similar to the handle_userfault() case, also make sure to never attempt
to send any event past the PF_EXITING point of no return.

This is purely a robustness check.

Link: http://lkml.kernel.org/r/20170224181957.19736-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 16d0cc600fa9..668bbbd2e04d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -530,8 +530,13 @@ out:
 static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 					     struct userfaultfd_wait_queue *ewq)
 {
-	int ret = 0;
+	int ret;
+
+	ret = -1;
+	if (WARN_ON_ONCE(current->flags & PF_EXITING))
+		goto out;
 
+	ret = 0;
 	ewq->ctx = ctx;
 	init_waitqueue_entry(&ewq->wq, current);
 
@@ -566,7 +571,7 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	 * ctx may go away after this if the userfault pseudo fd is
 	 * already released.
 	 */
-
+out:
 	userfaultfd_ctx_put(ctx);
 	return ret;
 }
-- 
cgit v1.2.3


From 8c9e7bb7a41f2bbd54b2caefb274fb3de239819f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:54 -0800
Subject: userfaultfd: non-cooperative: release all ctx in
 dup_userfaultfd_complete

Don't stop running dup_fctx() even if userfaultfd_event_wait_completion
fails as it has to run userfaultfd_ctx_put on all ctx to pair against
the userfaultfd_ctx_get that was run on all fctx->orig in
dup_userfaultfd.

Link: http://lkml.kernel.org/r/20170224181957.19736-4-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 668bbbd2e04d..dd48052e086f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -527,16 +527,12 @@ out:
 	return ret;
 }
 
-static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
-					     struct userfaultfd_wait_queue *ewq)
+static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
+					      struct userfaultfd_wait_queue *ewq)
 {
-	int ret;
-
-	ret = -1;
 	if (WARN_ON_ONCE(current->flags & PF_EXITING))
 		goto out;
 
-	ret = 0;
 	ewq->ctx = ctx;
 	init_waitqueue_entry(&ewq->wq, current);
 
@@ -552,7 +548,6 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 			break;
 		if (ACCESS_ONCE(ctx->released) ||
 		    fatal_signal_pending(current)) {
-			ret = -1;
 			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
 			break;
 		}
@@ -573,7 +568,6 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	 */
 out:
 	userfaultfd_ctx_put(ctx);
-	return ret;
 }
 
 static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
@@ -631,7 +625,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 	return 0;
 }
 
-static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
+static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
 {
 	struct userfaultfd_ctx *ctx = fctx->orig;
 	struct userfaultfd_wait_queue ewq;
@@ -641,17 +635,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
 	ewq.msg.event = UFFD_EVENT_FORK;
 	ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
 
-	return userfaultfd_event_wait_completion(ctx, &ewq);
+	userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
 void dup_userfaultfd_complete(struct list_head *fcs)
 {
-	int ret = 0;
 	struct userfaultfd_fork_ctx *fctx, *n;
 
 	list_for_each_entry_safe(fctx, n, fcs, list) {
-		if (!ret)
-			ret = dup_fctx(fctx);
+		dup_fctx(fctx);
 		list_del(&fctx->list);
 		kfree(fctx);
 	}
-- 
cgit v1.2.3


From cbfd0c1001bedb4b051cf4a1f5df24f1500381bc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 9 Mar 2017 16:16:57 -0800
Subject: include/linux/fs.h: fix unsigned enum warning with gcc-4.2

With arm-linux-gcc-4.2, almost every file we build in the kernel ends up
with this warning:

  include/linux/fs.h:2648: warning: comparison of unsigned expression < 0 is always false

Later versions don't have this problem, but it's easy enough to work
around.

Link: http://lkml.kernel.org/r/20161216105634.235457-12-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index aad3fd0ff5f8..7251f7bb45e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2678,7 +2678,7 @@ static const char * const kernel_read_file_str[] = {
 
 static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
 {
-	if (id < 0 || id >= READING_MAX_ID)
+	if ((unsigned)id >= READING_MAX_ID)
 		return kernel_read_file_str[READING_UNKNOWN];
 
 	return kernel_read_file_str[id];
-- 
cgit v1.2.3


From ce9311cf95ad8baf044a014738d76973d93b739a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Thu, 9 Mar 2017 16:17:00 -0800
Subject: mm/vmstats: add thp_split_pud event for clarity

We added support for PUD-sized transparent hugepages, however we count
the event "thp split pud" into thp_split_pmd event.

To separate the event count of thp split pud from pmd, add a new event
named thp_split_pud.

Link: http://lkml.kernel.org/r/1488282380-5076-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 3 +++
 mm/huge_memory.c              | 2 +-
 mm/vmstat.c                   | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 6aa1b6cb5828..a80b7b59cf33 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+		THP_SPLIT_PUD,
+#endif
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d36b2af4d1bf..8f037e256c54 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1828,7 +1828,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
 	VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
 
-	count_vm_event(THP_SPLIT_PMD);
+	count_vm_event(THP_SPLIT_PUD);
 
 	pudp_huge_clear_flush_notify(vma, haddr, pud);
 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 69f9aff39a2e..b1947f0cbee2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = {
 	"thp_split_page_failed",
 	"thp_deferred_split_page",
 	"thp_split_pmd",
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+	"thp_split_pud",
+#endif
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif
-- 
cgit v1.2.3


From f4b7ac68f438fa8521bbbf421f194ff10b0a7577 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Thu, 9 Mar 2017 16:17:03 -0800
Subject: drivers/md/bcache/util.h: remove duplicate inclusion of blkdev.h

Link: http://lkml.kernel.org/r/20170226060230.11555-1-standby24x7@gmail.com
Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Coly Li <colyli@suse.de>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/bcache/util.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index a126919ed102..5d13930f0f22 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -4,7 +4,6 @@
 
 #include <linux/blkdev.h>
 #include <linux/errno.h>
-#include <linux/blkdev.h>
 #include <linux/kernel.h>
 #include <linux/sched/clock.h>
 #include <linux/llist.h>
-- 
cgit v1.2.3


From bfc7228b9a9647e1c353e50b40297a2929801759 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:17:06 -0800
Subject: mm/cgroup: avoid panic when init with low memory

The system may panic when initialisation is done when almost all the
memory is assigned to the huge pages using the kernel command line
parameter hugepage=xxxx.  Panic may occur like this:

  Unable to handle kernel paging request for data at address 0x00000000
  Faulting instruction address: 0xc000000000302b88
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048 [    0.082424] NUMA
  pSeries
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-15-generic #16-Ubuntu
  task: c00000021ed01600 task.stack: c00000010d108000
  NIP: c000000000302b88 LR: c000000000270e04 CTR: c00000000016cfd0
  REGS: c00000010d10b2c0 TRAP: 0300   Not tainted (4.9.0-15-generic)
  MSR: 8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>[ 0.082770]   CR: 28424422  XER: 00000000
  CFAR: c0000000003d28b8 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1
  GPR00: c000000000270e04 c00000010d10b540 c00000000141a300 c00000010fff6300
  GPR04: 0000000000000000 00000000026012c0 c00000010d10b630 0000000487ab0000
  GPR08: 000000010ee90000 c000000001454fd8 0000000000000000 0000000000000000
  GPR12: 0000000000004400 c00000000fb80000 00000000026012c0 00000000026012c0
  GPR16: 00000000026012c0 0000000000000000 0000000000000000 0000000000000002
  GPR20: 000000000000000c 0000000000000000 0000000000000000 00000000024200c0
  GPR24: c0000000016eef48 0000000000000000 c00000010fff7d00 00000000026012c0
  GPR28: 0000000000000000 c00000010fff7d00 c00000010fff6300 c00000010d10b6d0
  NIP mem_cgroup_soft_limit_reclaim+0xf8/0x4f0
  LR do_try_to_free_pages+0x1b4/0x450
  Call Trace:
    do_try_to_free_pages+0x1b4/0x450
    try_to_free_pages+0xf8/0x270
    __alloc_pages_nodemask+0x7a8/0xff0
    new_slab+0x104/0x8e0
    ___slab_alloc+0x620/0x700
    __slab_alloc+0x34/0x60
    kmem_cache_alloc_node_trace+0xdc/0x310
    mem_cgroup_init+0x158/0x1c8
    do_one_initcall+0x68/0x1d0
    kernel_init_freeable+0x278/0x360
    kernel_init+0x24/0x170
    ret_from_kernel_thread+0x5c/0x74
  Instruction dump:
  eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8 4e800020 3d230001 e9499a42 3d220004
  3929acd8 794a1f24 7d295214 eac90100 <e9360000> 2fa90000 419eff74 3b200000
  ---[ end trace 342f5208b00d01b6 ]---

This is a chicken and egg issue where the kernel try to get free memory
when allocating per node data in mem_cgroup_init(), but in that path
mem_cgroup_soft_limit_reclaim() is called which assumes that these data
are allocated.

As mem_cgroup_soft_limit_reclaim() is best effort, it should return when
these data are not yet allocated.

This patch also fixes potential null pointer access in
mem_cgroup_remove_from_trees() and mem_cgroup_update_tree().

Link: http://lkml.kernel.org/r/1487856999-16581-2-git-send-email-ldufour@linux.vnet.ibm.com
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c52ec893e241..76f513cc1b0e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 	struct mem_cgroup_tree_per_node *mctz;
 
 	mctz = soft_limit_tree_from_page(page);
+	if (!mctz)
+		return;
 	/*
 	 * Necessary to update all ancestors when hierarchy is used.
 	 * because their event counter is not touched.
@@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
 	for_each_node(nid) {
 		mz = mem_cgroup_nodeinfo(memcg, nid);
 		mctz = soft_limit_tree_node(nid);
-		mem_cgroup_remove_exceeded(mz, mctz);
+		if (mctz)
+			mem_cgroup_remove_exceeded(mz, mctz);
 	}
 }
 
@@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 	 * is empty. Do it lockless to prevent lock bouncing. Races
 	 * are acceptable as soft limit is best effort anyway.
 	 */
-	if (RB_EMPTY_ROOT(&mctz->rb_root))
+	if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
 		return 0;
 
 	/*
-- 
cgit v1.2.3


From 7eb76d457fd758d396bc2e65cb0ace5aae614149 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:17:09 -0800
Subject: userfaultfd: non-cooperative: fix fork fctx->new memleak

We have a memleak in the ->new ctx if the uffd of the parent is closed
before the fork event is read, nothing frees the new context.

Link: http://lkml.kernel.org/r/20170302173738.18994-2-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index dd48052e086f..2407249998c3 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -549,6 +549,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 		if (ACCESS_ONCE(ctx->released) ||
 		    fatal_signal_pending(current)) {
 			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+			if (ewq->msg.event == UFFD_EVENT_FORK) {
+				struct userfaultfd_ctx *new;
+
+				new = (struct userfaultfd_ctx *)
+					(unsigned long)
+					ewq->msg.arg.reserved.reserved1;
+
+				userfaultfd_ctx_put(new);
+			}
 			break;
 		}
 
-- 
cgit v1.2.3


From 70ccb92fdd90b35bb6f9200093d4ffd6cb38156b Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:17:11 -0800
Subject: userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in
 MADV_DONTNEED

userfaultfd_remove() has to be execute before zapping the pagetables or
UFFDIO_COPY could keep filling pages after zap_page_range returned,
which would result in non zero data after a MADV_DONTNEED.

However userfaultfd_remove() may have to release the mmap_sem.  This was
handled correctly in MADV_REMOVE, but MADV_DONTNEED accessed a
potentially stale vma (the very vma passed to zap_page_range(vma, ...)).

The fix consists in revalidating the vma in case userfaultfd_remove()
had to release the mmap_sem.

This also optimizes away an unnecessary down_read/up_read in the
MADV_REMOVE case if UFFD_EVENT_FORK had to be delivered.

It all remains zero runtime cost in case CONFIG_USERFAULTFD=n as
userfaultfd_remove() will be defined as "true" at build time.

Link: http://lkml.kernel.org/r/20170302173738.18994-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c              |  9 +++------
 include/linux/userfaultfd_k.h |  7 +++----
 mm/madvise.c                  | 44 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 2407249998c3..9fd5e51ffb31 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -695,8 +695,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
 	userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
-void userfaultfd_remove(struct vm_area_struct *vma,
-			struct vm_area_struct **prev,
+bool userfaultfd_remove(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -705,13 +704,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,
 
 	ctx = vma->vm_userfaultfd_ctx.ctx;
 	if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
-		return;
+		return true;
 
 	userfaultfd_ctx_get(ctx);
 	up_read(&mm->mmap_sem);
 
-	*prev = NULL; /* We wait for ACK w/o the mmap semaphore */
-
 	msg_init(&ewq.msg);
 
 	ewq.msg.event = UFFD_EVENT_REMOVE;
@@ -720,7 +717,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,
 
 	userfaultfd_event_wait_completion(ctx, &ewq);
 
-	down_read(&mm->mmap_sem);
+	return false;
 }
 
 static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f2b79bf4c895..48a3483dccb1 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
-extern void userfaultfd_remove(struct vm_area_struct *vma,
-			       struct vm_area_struct **prev,
+extern bool userfaultfd_remove(struct vm_area_struct *vma,
 			       unsigned long start,
 			       unsigned long end);
 
@@ -118,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 {
 }
 
-static inline void userfaultfd_remove(struct vm_area_struct *vma,
-				      struct vm_area_struct **prev,
+static inline bool userfaultfd_remove(struct vm_area_struct *vma,
 				      unsigned long start,
 				      unsigned long end)
 {
+	return true;
 }
 
 static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
diff --git a/mm/madvise.c b/mm/madvise.c
index dc5927c812d3..7a2abf0127ae 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 	if (!can_madv_dontneed_vma(vma))
 		return -EINVAL;
 
-	userfaultfd_remove(vma, prev, start, end);
+	if (!userfaultfd_remove(vma, start, end)) {
+		*prev = NULL; /* mmap_sem has been dropped, prev is stale */
+
+		down_read(&current->mm->mmap_sem);
+		vma = find_vma(current->mm, start);
+		if (!vma)
+			return -ENOMEM;
+		if (start < vma->vm_start) {
+			/*
+			 * This "vma" under revalidation is the one
+			 * with the lowest vma->vm_start where start
+			 * is also < vma->vm_end. If start <
+			 * vma->vm_start it means an hole materialized
+			 * in the user address space within the
+			 * virtual range passed to MADV_DONTNEED.
+			 */
+			return -ENOMEM;
+		}
+		if (!can_madv_dontneed_vma(vma))
+			return -EINVAL;
+		if (end > vma->vm_end) {
+			/*
+			 * Don't fail if end > vma->vm_end. If the old
+			 * vma was splitted while the mmap_sem was
+			 * released the effect of the concurrent
+			 * operation may not cause MADV_DONTNEED to
+			 * have an undefined result. There may be an
+			 * adjacent next vma that we'll walk
+			 * next. userfaultfd_remove() will generate an
+			 * UFFD_EVENT_REMOVE repetition on the
+			 * end-vma->vm_end range, but the manager can
+			 * handle a repetition fine.
+			 */
+			end = vma->vm_end;
+		}
+		VM_WARN_ON(start >= end);
+	}
 	zap_page_range(vma, start, end - start);
 	return 0;
 }
@@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma,
 	 * mmap_sem.
 	 */
 	get_file(f);
-	userfaultfd_remove(vma, prev, start, end);
-	up_read(&current->mm->mmap_sem);
+	if (userfaultfd_remove(vma, start, end)) {
+		/* mmap_sem was not released by userfaultfd_remove() */
+		up_read(&current->mm->mmap_sem);
+	}
 	error = vfs_fallocate(f,
 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				offset, end - start);
-- 
cgit v1.2.3


From 46aa6a302b53f543f8e8b8e1714dc5e449ad36a6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:17:14 -0800
Subject: userfaultfd: selftest: vm: allow to build in vm/ directory

linux/tools/testing/selftests/vm $ make

  gcc -Wall -I ../../../../usr/include     compaction_test.c -lrt -o /compaction_test
  /usr/lib/gcc/x86_64-pc-linux-gnu/4.9.4/../../../../x86_64-pc-linux-gnu/bin/ld: cannot open output file /compaction_test: Permission denied
  collect2: error: ld returned 1 exit status
  make: *** [../lib.mk:54: /compaction_test] Error 1

Since commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
selftests/vm build fails if run from the "selftests/vm" directory, but
it works in the selftests/ directory.  It's quicker to be able to do a
local vm-only build after a tree wipe and this patch allows for it
again.

Link: http://lkml.kernel.org/r/20170302173738.18994-4-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 4cff7e7ddcc4..41642ba5e318 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,9 @@
 # Makefile for vm selftests
 
+ifndef OUTPUT
+  OUTPUT := $(shell pwd)
+endif
+
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
-- 
cgit v1.2.3


From c9a1b80daeb50e39f21fd7e62f7224f317ac85f0 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 9 Mar 2017 16:17:17 -0800
Subject: mm/memblock.c: fix memblock_next_valid_pfn()

Obviously, we should not access memblock.memory.regions[right] if
'right' is outside of [0..memblock.memory.cnt>.

Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible")
Link: http://lkml.kernel.org/r/20170303023745.9104-1-takahiro.akashi@linaro.org
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index b64b47803e52..696f06d17c4e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
 		}
 	} while (left < right);
 
-	return min(PHYS_PFN(type->regions[right].base), max_pfn);
+	if (right == type->cnt)
+		return max_pfn;
+	else
+		return min(PHYS_PFN(type->regions[right].base), max_pfn);
 }
 
 /**
-- 
cgit v1.2.3


From 8346242a7e32c4c93316684ad8f45473932586b9 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 16:17:20 -0800
Subject: rmap: fix NULL-pointer dereference on THP munlocking

The following test case triggers NULL-pointer derefernce in
try_to_unmap_one():

	#include <fcntl.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <sys/mman.h>

	int main(int argc, char *argv[])
	{
		int fd;

		system("mount -t tmpfs -o huge=always none /mnt");
		fd = open("/mnt/test", O_CREAT | O_RDWR);
		ftruncate(fd, 2UL << 20);
		mmap(NULL, 2UL << 20, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0);
		mmap(NULL, 2UL << 20, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_LOCKED, fd, 0);
		munlockall();
		return 0;
	}

Apparently, there's a case when we call try_to_unmap() on huge PMDs:
it's TTU_MUNLOCK.

Let's handle this case correctly.

Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/20170302151159.30592-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 2da487d6cea8..250635dc47b7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1316,12 +1316,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	}
 
 	while (page_vma_mapped_walk(&pvmw)) {
-		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
-		address = pvmw.address;
-
-		/* Unexpected PMD-mapped THP? */
-		VM_BUG_ON_PAGE(!pvmw.pte, page);
-
 		/*
 		 * If the page is mlock()d, we cannot swap it out.
 		 * If it's recently referenced (perhaps page_referenced
@@ -1345,6 +1339,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				continue;
 		}
 
+		/* Unexpected PMD-mapped THP? */
+		VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+		address = pvmw.address;
+
+
 		if (!(flags & TTU_IGNORE_ACCESS)) {
 			if (ptep_clear_flush_young_notify(vma, address,
 						pvmw.pte)) {
-- 
cgit v1.2.3


From 6ebb4a1b848fe75323135f93e72c78f8780fd268 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 16:17:23 -0800
Subject: thp: fix another corner case of munlock() vs. THPs

The following test case triggers BUG() in munlock_vma_pages_range():

	int main(int argc, char *argv[])
	{
		int fd;

		system("mount -t tmpfs -o huge=always none /mnt");
		fd = open("/mnt/test", O_CREAT | O_RDWR);
		ftruncate(fd, 4UL << 20);
		mmap(NULL, 4UL << 20, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0);
		mmap(NULL, 4096, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_LOCKED, fd, 0);
		munlockall();
		return 0;
	}

The second mmap() create PTE-mapping of the first huge page in file.  It
makes kernel munlock the page as we never keep PTE-mapped page mlocked.

On munlockall() when we handle vma created by the first mmap(),
munlock_vma_page() returns page_mask == 0, as the page is not mlocked
anymore.  On next iteration follow_page_mask() return tail page, but
page_mask is HPAGE_NR_PAGES - 1.  It makes us skip to the first tail
page of the next huge page and step on
VM_BUG_ON_PAGE(PageMlocked(page)).

The fix is not use the page_mask from follow_page_mask() at all.  It has
no use for us.

Link: http://lkml.kernel.org/r/20170302150252.34120-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>    [4.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mlock.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/mm/mlock.c b/mm/mlock.c
index 1050511f8b2b..02f138244bf5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -442,7 +442,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 
 	while (start < end) {
 		struct page *page;
-		unsigned int page_mask;
+		unsigned int page_mask = 0;
 		unsigned long page_increm;
 		struct pagevec pvec;
 		struct zone *zone;
@@ -456,8 +456,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 		 * suits munlock very well (and if somehow an abnormal page
 		 * has sneaked into the range, we won't oops here: great).
 		 */
-		page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
-				&page_mask);
+		page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
 
 		if (page && !IS_ERR(page)) {
 			if (PageTransTail(page)) {
@@ -468,8 +467,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 				/*
 				 * Any THP page found by follow_page_mask() may
 				 * have gotten split before reaching
-				 * munlock_vma_page(), so we need to recompute
-				 * the page_mask here.
+				 * munlock_vma_page(), so we need to compute
+				 * the page_mask here instead.
 				 */
 				page_mask = munlock_vma_page(page);
 				unlock_page(page);
-- 
cgit v1.2.3


From 40e952f9d687928b32db20226f085ae660a7237c Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Thu, 9 Mar 2017 16:17:26 -0800
Subject: mm: do not call mem_cgroup_free() from within mem_cgroup_alloc()

mem_cgroup_free() indirectly calls wb_domain_exit() which is not
prepared to deal with a struct wb_domain object that hasn't executed
wb_domain_init().  For instance, the following warning message is
printed by lockdep if alloc_percpu() fails in mem_cgroup_alloc():

  INFO: trying to register non-static key.
  the code is fine but needs lockdep annotation.
  turning off the locking correctness validator.
  CPU: 1 PID: 1950 Comm: mkdir Not tainted 4.10.0+ #151
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
   dump_stack+0x67/0x99
   register_lock_class+0x36d/0x540
   __lock_acquire+0x7f/0x1a30
   lock_acquire+0xcc/0x200
   del_timer_sync+0x3c/0xc0
   wb_domain_exit+0x14/0x20
   mem_cgroup_free+0x14/0x40
   mem_cgroup_css_alloc+0x3f9/0x620
   cgroup_apply_control_enable+0x190/0x390
   cgroup_mkdir+0x290/0x3d0
   kernfs_iop_mkdir+0x58/0x80
   vfs_mkdir+0x10e/0x1a0
   SyS_mkdirat+0xa8/0xd0
   SyS_mkdir+0x14/0x20
   entry_SYSCALL_64_fastpath+0x18/0xad

Add __mem_cgroup_free() which skips wb_domain_exit().  This is used by
both mem_cgroup_free() and mem_cgroup_alloc() clean up.

Fixes: 0b8f73e104285 ("mm: memcontrol: clean up alloc, online, offline, free functions")
Link: http://lkml.kernel.org/r/20170306192122.24262-1-tahsin@google.com
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 76f513cc1b0e..2bd7541d7c11 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4138,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	kfree(memcg->nodeinfo[node]);
 }
 
-static void mem_cgroup_free(struct mem_cgroup *memcg)
+static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
 
-	memcg_wb_domain_exit(memcg);
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
 	free_percpu(memcg->stat);
 	kfree(memcg);
 }
 
+static void mem_cgroup_free(struct mem_cgroup *memcg)
+{
+	memcg_wb_domain_exit(memcg);
+	__mem_cgroup_free(memcg);
+}
+
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
 	struct mem_cgroup *memcg;
@@ -4199,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 fail:
 	if (memcg->id.id > 0)
 		idr_remove(&mem_cgroup_idr, memcg->id.id);
-	mem_cgroup_free(memcg);
+	__mem_cgroup_free(memcg);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 68fd814a3391c7e017ae6ace8855788748edd981 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 9 Mar 2017 16:17:28 -0800
Subject: kasan: resched in quarantine_remove_cache()

We see reported stalls/lockups in quarantine_remove_cache() on machines
with large amounts of RAM.  quarantine_remove_cache() needs to scan
whole quarantine in order to take out all objects belonging to the
cache.  Quarantine is currently 1/32-th of RAM, e.g.  on a machine with
256GB of memory that will be 8GB.  Moreover quarantine scanning is a
walk over uncached linked list, which is slow.

Add cond_resched() after scanning of each non-empty batch of objects.
Batches are specifically kept of reasonable size for quarantine_put().
On a machine with 256GB of RAM we should have ~512 non-empty batches,
each with 16MB of objects.

Link: http://lkml.kernel.org/r/20170308154239.25440-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/quarantine.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 6f1ed1630873..4ac39f20757a 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -283,8 +283,15 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	on_each_cpu(per_cpu_remove_cache, cache, 1);
 
 	spin_lock_irqsave(&quarantine_lock, flags);
-	for (i = 0; i < QUARANTINE_BATCHES; i++)
+	for (i = 0; i < QUARANTINE_BATCHES; i++) {
+		if (qlist_empty(&global_quarantine[i]))
+			continue;
 		qlist_move_cache(&global_quarantine[i], &to_free, cache);
+		/* Scanning whole quarantine can take a while. */
+		spin_unlock_irqrestore(&quarantine_lock, flags);
+		cond_resched();
+		spin_lock_irqsave(&quarantine_lock, flags);
+	}
 	spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, cache);
-- 
cgit v1.2.3


From ce5bec54bb5debbbe51b40270d8f209a23cadae4 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 9 Mar 2017 16:17:32 -0800
Subject: kasan: fix races in quarantine_remove_cache()

quarantine_remove_cache() frees all pending objects that belong to the
cache, before we destroy the cache itself.  However there are currently
two possibilities how it can fail to do so.

First, another thread can hold some of the objects from the cache in
temp list in quarantine_put().  quarantine_put() has a windows of
enabled interrupts, and on_each_cpu() in quarantine_remove_cache() can
finish right in that window.  These objects will be later freed into the
destroyed cache.

Then, quarantine_reduce() has the same problem.  It grabs a batch of
objects from the global quarantine, then unlocks quarantine_lock and
then frees the batch.  quarantine_remove_cache() can finish while some
objects from the cache are still in the local to_free list in
quarantine_reduce().

Fix the race with quarantine_put() by disabling interrupts for the whole
duration of quarantine_put().  In combination with on_each_cpu() in
quarantine_remove_cache() it ensures that quarantine_remove_cache()
either sees the objects in the per-cpu list or in the global list.

Fix the race with quarantine_reduce() by protecting quarantine_reduce()
with srcu critical section and then doing synchronize_srcu() at the end
of quarantine_remove_cache().

I've done some assessment of how good synchronize_srcu() works in this
case.  And on a 4 CPU VM I see that it blocks waiting for pending read
critical sections in about 2-3% of cases.  Which looks good to me.

I suspect that these races are the root cause of some GPFs that I
episodically hit.  Previously I did not have any explanation for them.

  BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8
  IP: qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155
  PGD 6aeea067
  PUD 60ed7067
  PMD 0
  Oops: 0000 [#1] SMP KASAN
  Dumping ftrace buffer:
     (ftrace buffer empty)
  Modules linked in:
  CPU: 0 PID: 13667 Comm: syz-executor2 Not tainted 4.10.0+ #60
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  task: ffff88005f948040 task.stack: ffff880069818000
  RIP: 0010:qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155
  RSP: 0018:ffff88006981f298 EFLAGS: 00010246
  RAX: ffffea0000ffff00 RBX: 0000000000000000 RCX: ffffea0000ffff1f
  RDX: 0000000000000000 RSI: ffff88003fffc3e0 RDI: 0000000000000000
  RBP: ffff88006981f2c0 R08: ffff88002fed7bd8 R09: 00000001001f000d
  R10: 00000000001f000d R11: ffff88006981f000 R12: ffff88003fffc3e0
  R13: ffff88006981f2d0 R14: ffffffff81877fae R15: 0000000080000000
  FS:  00007fb911a2d700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000000000c8 CR3: 0000000060ed6000 CR4: 00000000000006f0
  Call Trace:
   quarantine_reduce+0x10e/0x120 mm/kasan/quarantine.c:239
   kasan_kmalloc+0xca/0xe0 mm/kasan/kasan.c:590
   kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544
   slab_post_alloc_hook mm/slab.h:456 [inline]
   slab_alloc_node mm/slub.c:2718 [inline]
   kmem_cache_alloc_node+0x1d3/0x280 mm/slub.c:2754
   __alloc_skb+0x10f/0x770 net/core/skbuff.c:219
   alloc_skb include/linux/skbuff.h:932 [inline]
   _sctp_make_chunk+0x3b/0x260 net/sctp/sm_make_chunk.c:1388
   sctp_make_data net/sctp/sm_make_chunk.c:1420 [inline]
   sctp_make_datafrag_empty+0x208/0x360 net/sctp/sm_make_chunk.c:746
   sctp_datamsg_from_user+0x7e8/0x11d0 net/sctp/chunk.c:266
   sctp_sendmsg+0x2611/0x3970 net/sctp/socket.c:1962
   inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
   sock_sendmsg_nosec net/socket.c:633 [inline]
   sock_sendmsg+0xca/0x110 net/socket.c:643
   SYSC_sendto+0x660/0x810 net/socket.c:1685
   SyS_sendto+0x40/0x50 net/socket.c:1653

I am not sure about backporting.  The bug is quite hard to trigger, I've
seen it few times during our massive continuous testing (however, it
could be cause of some other episodic stray crashes as it leads to
memory corruption...).  If it is triggered, the consequences are very
bad -- almost definite bad memory corruption.  The fix is non trivial
and has chances of introducing new bugs.  I am also not sure how
actively people use KASAN on older releases.

[dvyukov@google.com: - sorted includes[
  Link: http://lkml.kernel.org/r/20170309094028.51088-1-dvyukov@google.com
Link: http://lkml.kernel.org/r/20170308151532.5070-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/quarantine.c | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 4ac39f20757a..3a8ddf8baf7d 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -25,6 +25,7 @@
 #include <linux/printk.h>
 #include <linux/shrinker.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/string.h>
 #include <linux/types.h>
 
@@ -103,6 +104,7 @@ static int quarantine_tail;
 /* Total size of all objects in global_quarantine across all batches. */
 static unsigned long quarantine_size;
 static DEFINE_SPINLOCK(quarantine_lock);
+DEFINE_STATIC_SRCU(remove_cache_srcu);
 
 /* Maximum size of the global queue. */
 static unsigned long quarantine_max_size;
@@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
 	struct qlist_head *q;
 	struct qlist_head temp = QLIST_INIT;
 
+	/*
+	 * Note: irq must be disabled until after we move the batch to the
+	 * global quarantine. Otherwise quarantine_remove_cache() can miss
+	 * some objects belonging to the cache if they are in our local temp
+	 * list. quarantine_remove_cache() executes on_each_cpu() at the
+	 * beginning which ensures that it either sees the objects in per-cpu
+	 * lists or in the global quarantine.
+	 */
 	local_irq_save(flags);
 
 	q = this_cpu_ptr(&cpu_quarantine);
 	qlist_put(q, &info->quarantine_link, cache->size);
-	if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
+	if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
 		qlist_move_all(q, &temp);
 
-	local_irq_restore(flags);
-
-	if (unlikely(!qlist_empty(&temp))) {
-		spin_lock_irqsave(&quarantine_lock, flags);
+		spin_lock(&quarantine_lock);
 		WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
 		qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
 		if (global_quarantine[quarantine_tail].bytes >=
@@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
 			if (new_tail != quarantine_head)
 				quarantine_tail = new_tail;
 		}
-		spin_unlock_irqrestore(&quarantine_lock, flags);
+		spin_unlock(&quarantine_lock);
 	}
+
+	local_irq_restore(flags);
 }
 
 void quarantine_reduce(void)
 {
 	size_t total_size, new_quarantine_size, percpu_quarantines;
 	unsigned long flags;
+	int srcu_idx;
 	struct qlist_head to_free = QLIST_INIT;
 
 	if (likely(READ_ONCE(quarantine_size) <=
 		   READ_ONCE(quarantine_max_size)))
 		return;
 
+	/*
+	 * srcu critical section ensures that quarantine_remove_cache()
+	 * will not miss objects belonging to the cache while they are in our
+	 * local to_free list. srcu is chosen because (1) it gives us private
+	 * grace period domain that does not interfere with anything else,
+	 * and (2) it allows synchronize_srcu() to return without waiting
+	 * if there are no pending read critical sections (which is the
+	 * expected case).
+	 */
+	srcu_idx = srcu_read_lock(&remove_cache_srcu);
 	spin_lock_irqsave(&quarantine_lock, flags);
 
 	/*
@@ -237,6 +257,7 @@ void quarantine_reduce(void)
 	spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, NULL);
+	srcu_read_unlock(&remove_cache_srcu, srcu_idx);
 }
 
 static void qlist_move_cache(struct qlist_head *from,
@@ -280,6 +301,13 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	unsigned long flags, i;
 	struct qlist_head to_free = QLIST_INIT;
 
+	/*
+	 * Must be careful to not miss any objects that are being moved from
+	 * per-cpu list to the global quarantine in quarantine_put(),
+	 * nor objects being freed in quarantine_reduce(). on_each_cpu()
+	 * achieves the first goal, while synchronize_srcu() achieves the
+	 * second.
+	 */
 	on_each_cpu(per_cpu_remove_cache, cache, 1);
 
 	spin_lock_irqsave(&quarantine_lock, flags);
@@ -295,4 +323,6 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, cache);
+
+	synchronize_srcu(&remove_cache_srcu);
 }
-- 
cgit v1.2.3


From ca5b58ea3db88b5e69ba2a1f6bc3cf239cdcc64f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 9 Mar 2017 16:17:34 -0800
Subject: sh: cayman: IDE support fix

Remove incorrect CONFIG_IDE ifdef (CONFIG_IDE config option is for
internal drivers/ide/ use) and make IDE hardware interface always
initialized (not only when IDE subsystem is built-in).

This patch allows Cayman board to work with modular IDE subsystem
support and removes the requirement of having the whole core IDE
subsystem built-in when using libata PATA support.

Link: http://lkml.kernel.org/r/1990884.yFoE6lSB9G@amdc3058
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/boards/mach-cayman/setup.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
index 340fd40b381d..9c292c27e0d7 100644
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void)
 	SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX);
 	SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX);
 
-#ifdef CONFIG_IDE
 	/*
 	 * Only IDE1 exists on the Cayman
 	 */
@@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void)
 	SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */
 	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */
 	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */
-#endif
 
 	/* Exit the configuration state */
 	outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
-- 
cgit v1.2.3


From c0d0e351285161a515396b7b1ee53ec9ffd97e3c Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 9 Mar 2017 16:17:37 -0800
Subject: fat: fix using uninitialized fields of fat_inode/fsinfo_inode

Recently fallocate patch was merged and it uses
MSDOS_I(inode)->mmu_private at fat_evict_inode().  However,
fat_inode/fsinfo_inode that was introduced in past didn't initialize
MSDOS_I(inode) properly.

With those combinations, it became the cause of accessing random entry
in FAT area.

Link: http://lkml.kernel.org/r/87pohrj4i8.fsf@mail.parknet.co.jp
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reported-by: Moreno Bartalucci <moreno.bartalucci@tecnorama.it>
Tested-by: Moreno Bartalucci <moreno.bartalucci@tecnorama.it>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/inode.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 338d2f73eb29..a2c05f2ada6d 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1359,6 +1359,16 @@ out:
 	return 0;
 }
 
+static void fat_dummy_inode_init(struct inode *inode)
+{
+	/* Initialize this dummy inode to work as no-op. */
+	MSDOS_I(inode)->mmu_private = 0;
+	MSDOS_I(inode)->i_start = 0;
+	MSDOS_I(inode)->i_logstart = 0;
+	MSDOS_I(inode)->i_attrs = 0;
+	MSDOS_I(inode)->i_pos = 0;
+}
+
 static int fat_read_root(struct inode *inode)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	fat_inode = new_inode(sb);
 	if (!fat_inode)
 		goto out_fail;
-	MSDOS_I(fat_inode)->i_pos = 0;
+	fat_dummy_inode_init(fat_inode);
 	sbi->fat_inode = fat_inode;
 
 	fsinfo_inode = new_inode(sb);
 	if (!fsinfo_inode)
 		goto out_fail;
+	fat_dummy_inode_init(fsinfo_inode);
 	fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
 	sbi->fsinfo_inode = fsinfo_inode;
 	insert_inode_hash(fsinfo_inode);
-- 
cgit v1.2.3


From 2378cd6181edd94748d699448823609977283b11 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 9 Mar 2017 16:17:40 -0800
Subject: userfaultfd: remove wrong comment from userfaultfd_ctx_get()

It's a void function, so there is no return value;

Link: http://lkml.kernel.org/r/20170309150817.7510-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 9fd5e51ffb31..2bb1c72380f2 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -138,8 +138,6 @@ out:
  * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
  * context.
  * @ctx: [in] Pointer to the userfaultfd context.
- *
- * Returns: In case of success, returns not zero.
  */
 static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
 {
-- 
cgit v1.2.3


From 10f2889ba35aeb251b9945ec4c461af8c124c41f Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Wed, 14 Dec 2016 17:28:41 -0800
Subject: drm: mxsfb: use bus_format to determine LCD bus width

The LCD bus width does not need to align with the pixel format. The
LCDIF controller automatically converts between pixel formats and
bus width by padding or dropping LSBs.

The DRM subsystem has the notion of bus_format which allows to
determine what bus_formats are supported by the display. Choose the
first available or fallback to 24 bit if none are available.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 34 ++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/mxsfb/mxsfb_regs.h |  1 +
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index e10a4eda4078..259f71b3a76a 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -65,13 +65,11 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb)
 	switch (format) {
 	case DRM_FORMAT_RGB565:
 		dev_dbg(drm->dev, "Setting up RGB565 mode\n");
-		ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT);
 		ctrl |= CTRL_SET_WORD_LENGTH(0);
 		ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0xf);
 		break;
 	case DRM_FORMAT_XRGB8888:
 		dev_dbg(drm->dev, "Setting up XRGB8888 mode\n");
-		ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT);
 		ctrl |= CTRL_SET_WORD_LENGTH(3);
 		/* Do not use packed pixels = one pixel per word instead. */
 		ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0x7);
@@ -87,6 +85,36 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb)
 	return 0;
 }
 
+static void mxsfb_set_bus_fmt(struct mxsfb_drm_private *mxsfb)
+{
+	struct drm_crtc *crtc = &mxsfb->pipe.crtc;
+	struct drm_device *drm = crtc->dev;
+	u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	u32 reg;
+
+	reg = readl(mxsfb->base + LCDC_CTRL);
+
+	if (mxsfb->connector.display_info.num_bus_formats)
+		bus_format = mxsfb->connector.display_info.bus_formats[0];
+
+	reg &= ~CTRL_BUS_WIDTH_MASK;
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB565_1X16:
+		reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT);
+		break;
+	case MEDIA_BUS_FMT_RGB666_1X18:
+		reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_18BIT);
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X24:
+		reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT);
+		break;
+	default:
+		dev_err(drm->dev, "Unknown media bus format %d\n", bus_format);
+		break;
+	}
+	writel(reg, mxsfb->base + LCDC_CTRL);
+}
+
 static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb)
 {
 	u32 reg;
@@ -175,6 +203,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 
 	writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0);
 
+	mxsfb_set_bus_fmt(mxsfb);
+
 	/* Frame length in lines. */
 	writel(m->crtc_vtotal, mxsfb->base + LCDC_VDCTRL1);
 
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_regs.h b/drivers/gpu/drm/mxsfb/mxsfb_regs.h
index 31d62cd0d3d7..66a6ba9ec533 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_regs.h
+++ b/drivers/gpu/drm/mxsfb/mxsfb_regs.h
@@ -44,6 +44,7 @@
 #define CTRL_DATA_SELECT		(1 << 16)
 #define CTRL_SET_BUS_WIDTH(x)		(((x) & 0x3) << 10)
 #define CTRL_GET_BUS_WIDTH(x)		(((x) >> 10) & 0x3)
+#define CTRL_BUS_WIDTH_MASK		(0x3 << 10)
 #define CTRL_SET_WORD_LENGTH(x)		(((x) & 0x3) << 8)
 #define CTRL_GET_WORD_LENGTH(x)		(((x) >> 8) & 0x3)
 #define CTRL_MASTER			(1 << 5)
-- 
cgit v1.2.3


From 53990e416bb7adaa59d045f325a47f31a11b75ee Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Wed, 14 Dec 2016 12:48:09 -0800
Subject: drm: mxsfb: fix pixel clock polarity

The DRM subsystem specifies the pixel clock polarity from a
controllers perspective: DRM_BUS_FLAG_PIXDATA_NEGEDGE means
the controller drives the data on pixel clocks falling edge.
That is the controllers DOTCLK_POL=0 (Default is data launched
at negative edge).

Also change the data enable logic to be high active by default
and only change if explicitly requested via bus_flags. With
that defaults are:
- Data enable: high active
- Pixel clock polarity: controller drives data on negative edge

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 259f71b3a76a..165abd227436 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -196,9 +196,16 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 		vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH;
 	if (m->flags & DRM_MODE_FLAG_PVSYNC)
 		vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH;
-	if (bus_flags & DRM_BUS_FLAG_DE_HIGH)
+	/* Make sure Data Enable is high active by default */
+	if (!(bus_flags & DRM_BUS_FLAG_DE_LOW))
 		vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH;
-	if (bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+	/*
+	 * DRM_BUS_FLAG_PIXDATA_ defines are controller centric,
+	 * controllers VDCTRL0_DOTCLK is display centric.
+	 * Drive on positive edge       -> display samples on falling edge
+	 * DRM_BUS_FLAG_PIXDATA_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING
+	 */
+	if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
 		vdctrl0 |= VDCTRL0_DOTCLK_ACT_FALLING;
 
 	writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0);
-- 
cgit v1.2.3


From 7ad7a5acfb96215216f46b9848bd2d341663358f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sat, 28 Jan 2017 18:01:57 +0100
Subject: drm: mxsfb: Fix crash when provided invalid DT bindings

The mxsfb driver will crash if the mxsfb DT node has a subnode,
but the content of the subnode is not of-graph binding with an
endpoint linking to panel. The crash was triggered by providing
old-style panel bindings to the mxsfb driver instead of the new
of-graph ones.

The problem happens in mxsfb_create_output(), which is invoked
from mxsfb_load(). The mxsfb_create_output() iterates over all
mxsfb DT subnode endpoints and tries to bind a panel on each
endpoint. If there is any problem binding the panel, that is,
mxsfb->panel == NULL, this function will return an error code,
otherwise success 0 is returned.

If the subnodes do not specify of-graph binding with an endpoint,
the iteration over endpoints in mxsfb_create_output() will have
zero cycles and the function will immediatelly return 0, but the
mxsfb->panel will remain NULL. This is propagated back into the
mxsfb_load(), which does not detect any problem and expects that
the mxsfb->panel is valid, thus calls mxsfb_panel_attach(). But
since mxsfb->panel == NULL, mxsfb_panel_attach() is called with
first argument NULL and this crashes the kernel.

This patch fixes the problem by explicitly checking for valid
mxsfb->panel at the end of the iteration in mxsfb_create_output().

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Stefan Agner <stefan@agner.ch>
Cc: Breno Matheus Lima <brenomatheus@gmail.com>
Tested-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_out.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_out.c b/drivers/gpu/drm/mxsfb/mxsfb_out.c
index fa8d17399407..b8e81422d4e2 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_out.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_out.c
@@ -112,6 +112,7 @@ static int mxsfb_attach_endpoint(struct drm_device *drm,
 
 int mxsfb_create_output(struct drm_device *drm)
 {
+	struct mxsfb_drm_private *mxsfb = drm->dev_private;
 	struct device_node *ep_np = NULL;
 	struct of_endpoint ep;
 	int ret;
@@ -127,5 +128,8 @@ int mxsfb_create_output(struct drm_device *drm)
 		}
 	}
 
+	if (!mxsfb->panel)
+		return -EPROBE_DEFER;
+
 	return 0;
 }
-- 
cgit v1.2.3


From d42986b6c600da4215973878f1c889cdabbdd122 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Thu, 2 Feb 2017 19:26:38 -0200
Subject: drm: mxsfb_crtc: Fix the framebuffer misplacement

Currently the framebuffer content is displayed with incorrect offsets
in both the vertical and horizontal directions.

The fbdev version of the driver does not show this problem. Breno Lima
dumped the eLCDIF controller registers on both the drm and fbdev drivers
and noticed that the VDCTRL3 register is configured incorrectly in the
drm driver.

The fbdev driver calculates the vertical and horizontal wait counts
of the VDCTRL3 register by doing: back porch + sync length.

Looking at the horizontal and vertical timing diagram from
include/drm/drm_modes.h this value corresponds to:

crtc_[hv]total - crtc_[hv]sync_start

So fix the VDCTRL3 register setting accordingly so that the eLCDIF
controller can properly show the framebuffer content in the correct
position.

Reported-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Breno Lima <breno.lima@nxp.com>
Tested-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 165abd227436..1144e0c9e894 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -221,8 +221,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 	       VDCTRL2_SET_HSYNC_PERIOD(m->crtc_htotal),
 	       mxsfb->base + LCDC_VDCTRL2);
 
-	writel(SET_HOR_WAIT_CNT(m->crtc_hblank_end - m->crtc_hsync_end) |
-	       SET_VERT_WAIT_CNT(m->crtc_vblank_end - m->crtc_vsync_end),
+	writel(SET_HOR_WAIT_CNT(m->crtc_htotal - m->crtc_hsync_start) |
+	       SET_VERT_WAIT_CNT(m->crtc_vtotal - m->crtc_vsync_start),
 	       mxsfb->base + LCDC_VDCTRL3);
 
 	writel(SET_DOTCLK_H_VALID_DATA_CNT(m->hdisplay),
-- 
cgit v1.2.3


From 3f81e1340706e9a7f854808e2f580c3106805d0c Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Wed, 1 Feb 2017 15:19:47 -0200
Subject: drm: mxsfb: Implement drm_panel handling

Currently when the 'power-supply' regulator is passed via device tree
it does not actually work since drm_panel_prepare()/drm_panel_enable()
are never called.

Quoting Thierry Reding: "It should really call drm_panel_prepare() and
drm_panel_enable() while switching on the display pipeline and
drm_panel_disable(), followed by drm_panel_unprepare() while switching
off the display pipeline."

So do as suggested, so that the 'power-supply' regulator can be functional.

Reported-by: Breno Lima <breno.lima@nxp.com>
Suggested-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_drv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
index cdfbe0284635..ff6d6a6f842e 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -102,14 +102,18 @@ static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe,
 {
 	struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
 
+	drm_panel_prepare(mxsfb->panel);
 	mxsfb_crtc_enable(mxsfb);
+	drm_panel_enable(mxsfb->panel);
 }
 
 static void mxsfb_pipe_disable(struct drm_simple_display_pipe *pipe)
 {
 	struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
 
+	drm_panel_disable(mxsfb->panel);
 	mxsfb_crtc_disable(mxsfb);
+	drm_panel_unprepare(mxsfb->panel);
 }
 
 static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe,
-- 
cgit v1.2.3


From 9f138fa609c47403374a862a08a41394be53d461 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Wed, 8 Mar 2017 18:08:16 +0100
Subject: net: initialize msg.msg_flags in recvfrom

KMSAN reports a use of uninitialized memory in put_cmsg() because
msg.msg_flags in recvfrom haven't been initialized properly.
The flag values don't affect the result on this path, but it's still a
good idea to initialize them explicitly.

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/socket.c b/net/socket.c
index 2c1e8677ff2d..e0757e648c0c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1731,6 +1731,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 	/* We assume all kernel code knows the size of sockaddr_storage */
 	msg.msg_namelen = 0;
 	msg.msg_iocb = NULL;
+	msg.msg_flags = 0;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg, flags);
-- 
cgit v1.2.3


From 6cbac9828627b89487144c1e4448d7e0a6ab22ca Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Wed, 8 Mar 2017 16:46:57 +0100
Subject: tun: remove copyright printing

Printing copyright does not give any useful information on the boot
process.
Furthermore, the email address printed is obsolete since
commit ba57b6f20429 ("MAINTAINERS: fix bouncing tun/tap entries")

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dc1b1dd9157c..f58b7d850114 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2570,7 +2570,6 @@ static int __init tun_init(void)
 	int ret = 0;
 
 	pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
-	pr_info("%s\n", DRV_COPYRIGHT);
 
 	ret = rtnl_link_register(&tun_link_ops);
 	if (ret) {
-- 
cgit v1.2.3


From 3c2217a675bac22afb149166e0de71809189850d Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:32 -0500
Subject: bnxt_en: Perform function reset earlier during probe.

The firmware call to do function reset is done too late.  It is causing
the rings that have been reserved to be freed.  In NPAR mode, this bug
is causing us to run out of rings.

Fixes: 391be5c27364 ("bnxt_en: Implement new scheme to reserve tx rings.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 235733e91c79..33293ac32779 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7444,6 +7444,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		goto init_err_pci_clean;
+
 	bnxt_hwrm_fw_set_time(bp);
 
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
@@ -7554,10 +7558,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc)
-		goto init_err_pci_clean;
-
 	rc = bnxt_init_int_mode(bp);
 	if (rc)
 		goto init_err_pci_clean;
-- 
cgit v1.2.3


From b386cd362ffea09d05c56bfa85d104562e860647 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:33 -0500
Subject: bnxt_en: Call bnxt_ulp_stop() during tx timeout.

If we call bnxt_reset_task() due to tx timeout, we should call
bnxt_ulp_stop() to inform the RDMA driver about the error and the
impending reset.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 33293ac32779..bbe93713e226 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6495,8 +6495,14 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
 	if (!silent)
 		bnxt_dbg_dump_states(bp);
 	if (netif_running(bp->dev)) {
+		int rc;
+
+		if (!silent)
+			bnxt_ulp_stop(bp);
 		bnxt_close_nic(bp, false, false);
-		bnxt_open_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+		if (!silent && !rc)
+			bnxt_ulp_start(bp);
 	}
 }
 
-- 
cgit v1.2.3


From bc39f885a9c3bdbff0a96ecaf07b162a78eff6e4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:34 -0500
Subject: bnxt_en: Check if firmware LLDP agent is running.

Set DCB_CAP_DCBX_HOST capability flag only if the firmware LLDP agent
is not running.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 4 ++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     | 1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index bbe93713e226..869d4c97a9e8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4465,6 +4465,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
 	}
 #endif
+	if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) &
+			    FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED))
+		bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+
 	switch (resp->port_partition_type) {
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index faf26a2f726b..c7a5b84a5cb2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -993,6 +993,7 @@ struct bnxt {
 					 BNXT_FLAG_ROCEV2_CAP)
 	#define BNXT_FLAG_NO_AGG_RINGS	0x20000
 	#define BNXT_FLAG_RX_PAGE_MODE	0x40000
+	#define BNXT_FLAG_FW_LLDP_AGENT	0x80000
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index fdf2d8caf7bf..03532061d211 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -474,7 +474,7 @@ void bnxt_dcb_init(struct bnxt *bp)
 		return;
 
 	bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
-	if (BNXT_PF(bp))
+	if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
 		bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
 	else
 		bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
-- 
cgit v1.2.3


From 520ad89a54edea84496695d528f73ddcf4a52ea4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:35 -0500
Subject: bnxt_en: Ignore 0 value in autoneg supported speed from firmware.

In some situations, the firmware will return 0 for autoneg supported
speed.  This may happen if the firmware detects no SFP module, for
example.  The driver should ignore this so that we don't end up with
an invalid autoneg setting with nothing advertised.  When SFP module
is inserted, we'll get the updated settings from firmware at that time.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 869d4c97a9e8..32de4589d16a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5511,8 +5511,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 		bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
 				 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
 	}
-	link_info->support_auto_speeds =
-		le16_to_cpu(resp->supported_speeds_auto_mode);
+	if (resp->supported_speeds_auto_mode)
+		link_info->support_auto_speeds =
+			le16_to_cpu(resp->supported_speeds_auto_mode);
 
 hwrm_phy_qcaps_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
-- 
cgit v1.2.3


From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Mar 2017 08:09:05 +0000
Subject: net: Work around lockdep limitation in sockets that use sockets

Lockdep issues a circular dependency warning when AFS issues an operation
through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem.

The theory lockdep comes up with is as follows:

 (1) If the pagefault handler decides it needs to read pages from AFS, it
     calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but
     creating a call requires the socket lock:

	mmap_sem must be taken before sk_lock-AF_RXRPC

 (2) afs_open_socket() opens an AF_RXRPC socket and binds it.  rxrpc_bind()
     binds the underlying UDP socket whilst holding its socket lock.
     inet_bind() takes its own socket lock:

	sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET

 (3) Reading from a TCP socket into a userspace buffer might cause a fault
     and thus cause the kernel to take the mmap_sem, but the TCP socket is
     locked whilst doing this:

	sk_lock-AF_INET must be taken before mmap_sem

However, lockdep's theory is wrong in this instance because it deals only
with lock classes and not individual locks.  The AF_INET lock in (2) isn't
really equivalent to the AF_INET lock in (3) as the former deals with a
socket entirely internal to the kernel that never sees userspace.  This is
a limitation in the design of lockdep.

Fix the general case by:

 (1) Double up all the locking keys used in sockets so that one set are
     used if the socket is created by userspace and the other set is used
     if the socket is created by the kernel.

 (2) Store the kern parameter passed to sk_alloc() in a variable in the
     sock struct (sk_kern_sock).  This informs sock_lock_init(),
     sock_init_data() and sk_clone_lock() as to the lock keys to be used.

     Note that the child created by sk_clone_lock() inherits the parent's
     kern setting.

 (3) Add a 'kern' parameter to ->accept() that is analogous to the one
     passed in to ->create() that distinguishes whether kernel_accept() or
     sys_accept4() was the caller and can be passed to sk_alloc().

     Note that a lot of accept functions merely dequeue an already
     allocated socket.  I haven't touched these as the new socket already
     exists before we get the parameter.

     Note also that there are a couple of places where I've made the accepted
     socket unconditionally kernel-based:

	irda_accept()
	rds_rcp_accept_one()
	tcp_accept_from_sock()

     because they follow a sock_create_kern() and accept off of that.

Whilst creating this, I noticed that lustre and ocfs don't create sockets
through sock_create_kern() and thus they aren't marked as for-kernel,
though they appear to be internal.  I wonder if these should do that so
that they use the new set of lock keys.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/af_alg.c                               |   9 ++-
 crypto/algif_hash.c                           |   9 ++-
 drivers/staging/lustre/lnet/lnet/lib-socket.c |   4 +-
 fs/dlm/lowcomms.c                             |   2 +-
 fs/ocfs2/cluster/tcp.c                        |   2 +-
 include/crypto/if_alg.h                       |   2 +-
 include/linux/net.h                           |   2 +-
 include/net/inet_common.h                     |   3 +-
 include/net/inet_connection_sock.h            |   2 +-
 include/net/sctp/structs.h                    |   3 +-
 include/net/sock.h                            |   9 ++-
 net/atm/svc.c                                 |   5 +-
 net/ax25/af_ax25.c                            |   3 +-
 net/bluetooth/l2cap_sock.c                    |   2 +-
 net/bluetooth/rfcomm/sock.c                   |   3 +-
 net/bluetooth/sco.c                           |   2 +-
 net/core/sock.c                               | 106 ++++++++++++++------------
 net/decnet/af_decnet.c                        |   5 +-
 net/ipv4/af_inet.c                            |   5 +-
 net/ipv4/inet_connection_sock.c               |   2 +-
 net/irda/af_irda.c                            |   5 +-
 net/iucv/af_iucv.c                            |   2 +-
 net/llc/af_llc.c                              |   4 +-
 net/netrom/af_netrom.c                        |   3 +-
 net/nfc/llcp_sock.c                           |   2 +-
 net/phonet/pep.c                              |   6 +-
 net/phonet/socket.c                           |   4 +-
 net/rds/tcp_listen.c                          |   2 +-
 net/rose/af_rose.c                            |   3 +-
 net/sctp/ipv6.c                               |   5 +-
 net/sctp/protocol.c                           |   5 +-
 net/sctp/socket.c                             |   4 +-
 net/smc/af_smc.c                              |   2 +-
 net/socket.c                                  |   4 +-
 net/tipc/socket.c                             |   8 +-
 net/unix/af_unix.c                            |   5 +-
 net/vmw_vsock/af_vsock.c                      |   3 +-
 net/x25/af_x25.c                              |   3 +-
 38 files changed, 142 insertions(+), 108 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index f5e18c2a4852..690deca17c35 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -266,7 +266,7 @@ unlock:
 	return err;
 }
 
-int af_alg_accept(struct sock *sk, struct socket *newsock)
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	const struct af_alg_type *type;
@@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
 	if (!type)
 		goto unlock;
 
-	sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
+	sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
 	err = -ENOMEM;
 	if (!sk2)
 		goto unlock;
@@ -323,9 +323,10 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(af_alg_accept);
 
-static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
+		      bool kern)
 {
-	return af_alg_accept(sock->sk, newsock);
+	return af_alg_accept(sock->sk, newsock, kern);
 }
 
 static const struct proto_ops alg_proto_ops = {
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 54fc90e8339c..5e92bd275ef3 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,8 @@ unlock:
 	return err ?: len;
 }
 
-static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
@@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (err)
 		return err;
 
-	err = af_alg_accept(ask->parent, newsock);
+	err = af_alg_accept(ask->parent, newsock, kern);
 	if (err)
 		return err;
 
@@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 }
 
 static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
-			     int flags)
+			     int flags, bool kern)
 {
 	int err;
 
@@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
 	if (err)
 		return err;
 
-	return hash_accept(sock, newsock, flags);
+	return hash_accept(sock, newsock, flags, kern);
 }
 
 static struct proto_ops algif_hash_ops_nokey = {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index b7b87ecefcdf..9fca8d225ee0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 
 	newsock->ops = sock->ops;
 
-	rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+	rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
 	if (rc == -EAGAIN) {
 		/* Nothing ready, so wait for activity */
 		init_waitqueue_entry(&wait, current);
@@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule();
 		remove_wait_queue(sk_sleep(sock->sk), &wait);
-		rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+		rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
 	}
 
 	if (rc)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7d398d300e97..9382db998ec9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
 	newsock->type = con->sock->type;
 	newsock->ops = con->sock->ops;
 
-	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
 	if (result < 0)
 		goto accept_err;
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4348027384f5..d0ab7e56d0b4 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 
 	new_sock->type = sock->type;
 	new_sock->ops = sock->ops;
-	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
 	if (ret < 0)
 		goto out;
 
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index a2bfd7843f18..e2b9c6fe2714 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
 
 int af_alg_release(struct socket *sock);
 void af_alg_release_parent(struct sock *sk);
-int af_alg_accept(struct sock *sk, struct socket *newsock);
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
diff --git a/include/linux/net.h b/include/linux/net.h
index cd0c8bd0a1de..0620f5e18c96 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
 	int		(*socketpair)(struct socket *sock1,
 				      struct socket *sock2);
 	int		(*accept)    (struct socket *sock,
-				      struct socket *newsock, int flags);
+				      struct socket *newsock, int flags, bool kern);
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
 				      int *sockaddr_len, int peer);
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index b7952d55b9c0..f39ae697347f 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			  int addr_len, int flags, int is_sendmsg);
 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 		       int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+		bool kern);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 		      size_t size, int flags);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 826f198374f8..c7a577976bec 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
         return (unsigned long)min_t(u64, when, max_when);
 }
 
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
 
 int inet_csk_get_port(struct sock *sk, unsigned short snum);
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a244db5e5ff7..07a0b128625a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -476,7 +476,8 @@ struct sctp_pf {
 	int  (*send_verify) (struct sctp_sock *, union sctp_addr *);
 	int  (*supported_addrs)(const struct sctp_sock *, __be16 *);
 	struct sock *(*create_accept_sk) (struct sock *sk,
-					  struct sctp_association *asoc);
+					  struct sctp_association *asoc,
+					  bool kern);
 	int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
 	void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
 	void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 5e5997654db6..03252d53975d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -236,6 +236,7 @@ struct sock_common {
   *	@sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
   *	@sk_lock:	synchronizer
+  *	@sk_kern_sock: True if sock is using kernel lock classes
   *	@sk_rcvbuf: size of receive buffer in bytes
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early demux
@@ -430,7 +431,8 @@ struct sock {
 #endif
 
 	kmemcheck_bitfield_begin(flags);
-	unsigned int		sk_padding : 2,
+	unsigned int		sk_padding : 1,
+				sk_kern_sock : 1,
 				sk_no_check_tx : 1,
 				sk_no_check_rx : 1,
 				sk_userlocks : 4,
@@ -1015,7 +1017,8 @@ struct proto {
 					int addr_len);
 	int			(*disconnect)(struct sock *sk, int flags);
 
-	struct sock *		(*accept)(struct sock *sk, int flags, int *err);
+	struct sock *		(*accept)(struct sock *sk, int flags, int *err,
+					  bool kern);
 
 	int			(*ioctl)(struct sock *sk, int cmd,
 					 unsigned long arg);
@@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
 int sock_no_bind(struct socket *, struct sockaddr *, int);
 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int);
+int sock_no_accept(struct socket *, struct socket *, int, bool);
 int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
 unsigned int sock_no_poll(struct file *, struct socket *,
 			  struct poll_table_struct *);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index db9794ec61d8..5589de7086af 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -318,7 +318,8 @@ out:
 	return error;
 }
 
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
+		      bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
@@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	lock_sock(sk);
 
-	error = svc_create(sock_net(sk), newsock, 0, 0);
+	error = svc_create(sock_net(sk), newsock, 0, kern);
 	if (error)
 		goto out;
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a8e42cedf1db..b7c486752b3a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1320,7 +1320,8 @@ out_release:
 	return err;
 }
 
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index f307b145ea54..507b80d59dec 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -301,7 +301,7 @@ done:
 }
 
 static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
-			     int flags)
+			     int flags, bool kern)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index aa1a814ceddc..ac3c650cb234 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -471,7 +471,8 @@ done:
 	return err;
 }
 
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
+			      bool kern)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e4e9a2da1e7e..728e0c8dc8e7 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -627,7 +627,7 @@ done:
 }
 
 static int sco_sock_accept(struct socket *sock, struct socket *newsock,
-			   int flags)
+			   int flags, bool kern)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *ch;
diff --git a/net/core/sock.c b/net/core/sock.c
index f6fd79f33097..a96d5f7a5734 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable);
 
 /*
  * Each address family might have different locking rules, so we have
- * one slock key per address family:
+ * one slock key per address family and separate keys for internal and
+ * userspace sockets.
  */
 static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_kern_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
+static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
 
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
+
+#define _sock_locks(x)						  \
+  x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
+  x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
+  x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
+  x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
+  x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
+  x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
+  x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
+  x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
+  x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
+  x "27"       ,	x "28"          ,	x "AF_CAN"      , \
+  x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
+  x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
+  x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
+  x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
+  x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_MAX"
+
 static const char *const af_family_key_strings[AF_MAX+1] = {
-  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
-  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
-  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
-  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
-  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
-  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
-  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
-  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
-  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
-  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
-  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
-  "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_KCM"      ,
-  "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC"     , "sk_lock-AF_MAX"
+	_sock_locks("sk_lock-")
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
-  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
-  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
-  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
-  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
-  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
-  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
-  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
-  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
-  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
-  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
-  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
-  "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_KCM"       ,
-  "slock-AF_QIPCRTR", "slock-AF_SMC"     , "slock-AF_MAX"
+	_sock_locks("slock-")
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
-  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
-  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
-  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
-  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
-  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
-  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
-  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
-  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
-  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
-  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
-  "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_KCM"      ,
-  "clock-AF_QIPCRTR", "clock-AF_SMC"     , "clock-AF_MAX"
+	_sock_locks("clock-")
+};
+
+static const char *const af_family_kern_key_strings[AF_MAX+1] = {
+	_sock_locks("k-sk_lock-")
+};
+static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
+	_sock_locks("k-slock-")
+};
+static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
+	_sock_locks("k-clock-")
 };
 
 /*
@@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
  * so split the lock classes by using a per-AF key:
  */
 static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
 /* Take into consideration the size of the struct sk_buff overhead in the
  * determination of these values, since that is non-constant across
@@ -1293,7 +1283,16 @@ lenout:
  */
 static inline void sock_lock_init(struct sock *sk)
 {
-	sock_lock_init_class_and_name(sk,
+	if (sk->sk_kern_sock)
+		sock_lock_init_class_and_name(
+			sk,
+			af_family_kern_slock_key_strings[sk->sk_family],
+			af_family_kern_slock_keys + sk->sk_family,
+			af_family_kern_key_strings[sk->sk_family],
+			af_family_kern_keys + sk->sk_family);
+	else
+		sock_lock_init_class_and_name(
+			sk,
 			af_family_slock_key_strings[sk->sk_family],
 			af_family_slock_keys + sk->sk_family,
 			af_family_key_strings[sk->sk_family],
@@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		 * why we need sk_prot_creator -acme
 		 */
 		sk->sk_prot = sk->sk_prot_creator = prot;
+		sk->sk_kern_sock = kern;
 		sock_lock_init(sk);
 		sk->sk_net_refcnt = kern ? 0 : 1;
 		if (likely(sk->sk_net_refcnt))
@@ -2277,7 +2277,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
 }
 EXPORT_SYMBOL(sock_no_socketpair);
 
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
+		   bool kern)
 {
 	return -EOPNOTSUPP;
 }
@@ -2481,7 +2482,14 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	}
 
 	rwlock_init(&sk->sk_callback_lock);
-	lockdep_set_class_and_name(&sk->sk_callback_lock,
+	if (sk->sk_kern_sock)
+		lockdep_set_class_and_name(
+			&sk->sk_callback_lock,
+			af_kern_callback_keys + sk->sk_family,
+			af_family_kern_clock_key_strings[sk->sk_family]);
+	else
+		lockdep_set_class_and_name(
+			&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
 			af_family_clock_key_strings[sk->sk_family]);
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index e6e79eda9763..7de5b40a5d0d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
 	return skb == NULL ? ERR_PTR(err) : skb;
 }
 
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+		     bool kern)
 {
 	struct sock *sk = sock->sk, *newsk;
 	struct sk_buff *skb = NULL;
@@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	cb = DN_SKB_CB(skb);
 	sk->sk_ack_backlog--;
-	newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
+	newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
 	if (newsk == NULL) {
 		release_sock(sk);
 		kfree_skb(skb);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5091f46826fa..6b1fc6e4278e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect);
  *	Accept a pending connection. The TCP layer now gives BSD semantics.
  */
 
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+		bool kern)
 {
 	struct sock *sk1 = sock->sk;
 	int err = -EINVAL;
-	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
 
 	if (!sk2)
 		goto do_err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b4d5980ade3b..5e313c1ac94f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 /*
  * This will accept the next outstanding connection.
  */
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 81adc29a448d..8d77ad5cadaf 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -828,7 +828,8 @@ out:
  *    Wait for incoming connection
  *
  */
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *new, *self = irda_sk(sk);
@@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	struct sk_buff *skb = NULL;
 	int err;
 
-	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
+	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
 	if (err)
 		return err;
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 89bbde1081ce..84de7b6326dc 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -938,7 +938,7 @@ done:
 
 /* Accept a pending connection */
 static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
-			    int flags)
+			    int flags, bool kern)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct sock *sk = sock->sk, *nsk;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 06186d608a27..cb4fff785cbf 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
  *	@sock: Socket which connections arrive on.
  *	@newsock: Socket to move incoming connection to.
  *	@flags: User specified operational flags.
+ *	@kern: If the socket is kernel internal
  *
  *	Accept a new incoming connection.
  *	Returns 0 upon success, negative otherwise.
  */
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
+			 bool kern)
 {
 	struct sock *sk = sock->sk, *newsk;
 	struct llc_sock *llc, *newllc;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4bbf4526b885..ebf16f7f9089 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -765,7 +765,8 @@ out_release:
 	return err;
 }
 
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
+		     bool kern)
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 879885b31cce..2ffb18e73df6 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 }
 
 static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
-			    int flags)
+			    int flags, bool kern)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct sock *sk = sock->sk, *new_sk;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 222bedcd9575..e81537991ddf 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
 	sock_put(sk);
 }
 
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+				    bool kern)
 {
 	struct pep_sock *pn = pep_sk(sk), *newpn;
 	struct sock *newsk = NULL;
@@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
 	}
 
 	/* Create a new to-be-accepted sock */
-	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
+	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
+			 kern);
 	if (!newsk) {
 		pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
 		err = -ENOBUFS;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index a6c8da3ee893..64634e3ec2fc 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -305,7 +305,7 @@ out:
 }
 
 static int pn_socket_accept(struct socket *sock, struct socket *newsock,
-				int flags)
+			    int flags, bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *newsk;
@@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 	if (unlikely(sk->sk_state != TCP_LISTEN))
 		return -EINVAL;
 
-	newsk = sk->sk_prot->accept(sk, flags, &err);
+	newsk = sk->sk_prot->accept(sk, flags, &err, kern);
 	if (!newsk)
 		return err;
 
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 2c69a508a693..507678853e6c 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock)
 
 	new_sock->type = sock->type;
 	new_sock->ops = sock->ops;
-	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
 	if (ret < 0)
 		goto out;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index b8a1df2c9785..4a9729257023 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -871,7 +871,8 @@ out_release:
 	return err;
 }
 
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 063baac5b9fe..961ee59f696a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
 
 /* Create and initialize a new sk for the socket to be returned by accept(). */
 static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
-					     struct sctp_association *asoc)
+					     struct sctp_association *asoc,
+					     bool kern)
 {
 	struct sock *newsk;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct sctp6_sock *newsctp6sk;
 	struct ipv6_txoptions *opt;
 
-	newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
+	newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
 	if (!newsk)
 		goto out;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1b6d4574d2b0..989a900383b5 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
 
 /* Create and initialize a new sk for the socket returned by accept(). */
 static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
-					     struct sctp_association *asoc)
+					     struct sctp_association *asoc,
+					     bool kern)
 {
 	struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
-			sk->sk_prot, 0);
+			sk->sk_prot, kern);
 	struct inet_sock *newinet;
 
 	if (!newsk)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6f0a9be50f50..0f378ea2ae38 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
  * descriptor will be returned from accept() to represent the newly
  * formed association.
  */
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 {
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
@@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
 	 */
 	asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
 
-	newsk = sp->pf->create_accept_sk(sk, asoc);
+	newsk = sp->pf->create_accept_sk(sk, asoc, kern);
 	if (!newsk) {
 		error = -ENOMEM;
 		goto out;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 85837ab90e89..093803786eac 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -944,7 +944,7 @@ out:
 }
 
 static int smc_accept(struct socket *sock, struct socket *new_sock,
-		      int flags)
+		      int flags, bool kern)
 {
 	struct sock *sk = sock->sk, *nsk;
 	DECLARE_WAITQUEUE(wait, current);
diff --git a/net/socket.c b/net/socket.c
index e0757e648c0c..e034fe4164be 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1506,7 +1506,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	if (err)
 		goto out_fd;
 
-	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+	err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
 	if (err < 0)
 		goto out_fd;
 
@@ -3239,7 +3239,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 	if (err < 0)
 		goto done;
 
-	err = sock->ops->accept(sock, *newsock, flags);
+	err = sock->ops->accept(sock, *newsock, flags, true);
 	if (err < 0) {
 		sock_release(*newsock);
 		*newsock = NULL;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 43e4045e72bc..7130e73bd42c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
 static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+		       bool kern);
 static void tipc_sk_timeout(unsigned long data);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
@@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
  *
  * Returns 0 on success, errno otherwise
  */
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+		       bool kern)
 {
 	struct sock *new_sk, *sk = sock->sk;
 	struct sk_buff *buf;
@@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 
 	buf = skb_peek(&sk->sk_receive_queue);
 
-	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
+	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
 	if (res)
 		goto exit;
 	security_sk_clone(sock->sk, new_sock->sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ee37b390260a..928691c43408 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
 static int unix_stream_connect(struct socket *, struct sockaddr *,
 			       int addr_len, int flags);
 static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int);
+static int unix_accept(struct socket *, struct socket *, int, bool);
 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 static unsigned int unix_dgram_poll(struct file *, struct socket *,
@@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
 		set_bit(SOCK_PASSSEC, &new->flags);
 }
 
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *tsk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9192ead66751..9f770f33c100 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1250,7 +1250,8 @@ out:
 	return err;
 }
 
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
+			bool kern)
 {
 	struct sock *listener;
 	int err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index fd28a49dbe8f..8b911c29860e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
 	return rc;
 }
 
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
+		      bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *newsk;
-- 
cgit v1.2.3


From 4b3b45edba9222e518a1ec72df841eba3609fe34 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 9 Mar 2017 13:56:46 +0300
Subject: udp: avoid ufo handling on IP payload compression packets

commit c146066ab802 ("ipv4: Don't use ufo handling on later transformed
packets") and commit f89c56ce710a ("ipv6: Don't use ufo handling on
later transformed packets") added a check that 'rt->dst.header_len' isn't
zero in order to skip UFO, but it doesn't include IPcomp in transport mode
where it equals zero.

Packets, after payload compression, may not require further fragmentation,
and if original length exceeds MTU, later compressed packets will be
transmitted incorrectly. This can be reproduced with LTP udp_ipsec.sh test
on veth device with enabled UFO, MTU is 1500 and UDP payload is 2000:

* IPv4 case, offset is wrong + unnecessary fragmentation
    udp_ipsec.sh -p comp -m transport -s 2000 &
    tcpdump -ni ltp_ns_veth2
    ...
    IP (tos 0x0, ttl 64, id 45203, offset 0, flags [+],
      proto Compressed IP (108), length 49)
      10.0.0.2 > 10.0.0.1: IPComp(cpi=0x1000)
    IP (tos 0x0, ttl 64, id 45203, offset 1480, flags [none],
      proto UDP (17), length 21) 10.0.0.2 > 10.0.0.1: ip-proto-17

* IPv6 case, sending small fragments
    udp_ipsec.sh -6 -p comp -m transport -s 2000 &
    tcpdump -ni ltp_ns_veth2
    ...
    IP6 (flowlabel 0x6b9ba, hlim 64, next-header Compressed IP (108)
      payload length: 37) fd00::2 > fd00::1: IPComp(cpi=0x1000)
    IP6 (flowlabel 0x6b9ba, hlim 64, next-header Compressed IP (108)
      payload length: 21) fd00::2 > fd00::1: IPComp(cpi=0x1000)

Fix it by checking 'rt->dst.xfrm' pointer to 'xfrm_state' struct, skip UFO
if xfrm is set. So the new check will include both cases: IPcomp and IPsec.

Fixes: c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets")
Fixes: f89c56ce710a ("ipv6: Don't use ufo handling on later transformed packets")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 2 +-
 net/ipv6/ip6_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 737ce826d7ec..7a3fd25e8913 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -966,7 +966,7 @@ static int __ip_append_data(struct sock *sk,
 	cork->length += length;
 	if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
 		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
 					 hh_len, fragheaderlen, transhdrlen,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 528b3c1f3fde..df42096e1f04 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1385,7 +1385,7 @@ emsgsize:
 	if ((((length + fragheaderlen) > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen, exthdrlen,
-- 
cgit v1.2.3


From 6fc166d62c6f9f6eda5ea300f671d34e89bdd3c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Mar 2017 08:10:32 +0000
Subject: rxrpc: rxrpc_kernel_send_data() needs to handle failed call better

If rxrpc_kernel_send_data() is asked to send data through a call that has
already failed (due to a remote abort, received protocol error or network
error), then return the associated error code saved in the call rather than
ESHUTDOWN.

This allows the caller to work out whether to ask for the abort code or not
based on this.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/sendmsg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 47ccfeacc1c6..97ab214ca411 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -618,8 +618,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
 		break;
 	case RXRPC_CALL_COMPLETE:
-		/* It's too late for this call */
-		ret = -ESHUTDOWN;
+		read_lock_bh(&call->state_lock);
+		ret = -call->error;
+		read_unlock_bh(&call->state_lock);
 		break;
 	default:
 		 /* Request phase complete for this client call */
-- 
cgit v1.2.3


From d7aba644ffdebf756e51e26a2229055211838e89 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Thu, 9 Mar 2017 17:48:23 -0600
Subject: amd-xgbe: Enable IRQs only if napi_complete_done() is true

Depending on the hardware, the amd-xgbe driver may use disable_irq_nosync()
and enable_irq() when an interrupt is received to process Rx packets. If
the napi_complete_done() return value isn't checked an unbalanced enable
for the IRQ could result, generating a warning stack trace.

Update the driver to only enable interrupts if napi_complete_done() returns
true.

Reported-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 248f60d171a5..ffea9859f5a7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2272,10 +2272,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
 	processed = xgbe_rx_poll(channel, budget);
 
 	/* If we processed everything, we are done */
-	if (processed < budget) {
-		/* Turn off polling */
-		napi_complete_done(napi, processed);
-
+	if ((processed < budget) && napi_complete_done(napi, processed)) {
 		/* Enable Tx and Rx interrupts */
 		if (pdata->channel_irq_mode)
 			xgbe_enable_rx_tx_int(pdata, channel);
@@ -2317,10 +2314,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
 	} while ((processed < budget) && (processed != last_processed));
 
 	/* If we processed everything, we are done */
-	if (processed < budget) {
-		/* Turn off polling */
-		napi_complete_done(napi, processed);
-
+	if ((processed < budget) && napi_complete_done(napi, processed)) {
 		/* Enable Tx and Rx interrupts */
 		xgbe_enable_rx_tx_ints(pdata);
 	}
-- 
cgit v1.2.3


From ffff71328a3c321f7c14cc1edd33577717037744 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:43 -0800
Subject: net: bcmgenet: correct the RBUF_OVFL_CNT and RBUF_ERR_CNT MIB values

The location of the RBUF overflow and error counters has moved between
different version of the GENET MAC.  This commit corrects the driver to
read from the correct locations depending on the version of the GENET
MAC.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 60 +++++++++++++++++++++++---
 drivers/net/ethernet/broadcom/genet/bcmgenet.h | 10 +++--
 2 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f92896835d2a..f0fb7eca8eb4 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1,7 +1,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) controller driver
  *
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -778,8 +778,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
 	STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
 	/* Misc UniMAC counters */
 	STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
-			UMAC_RBUF_OVFL_CNT),
-	STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+			UMAC_RBUF_OVFL_CNT_V1),
+	STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+			UMAC_RBUF_ERR_CNT_V1),
 	STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
 	STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
 	STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +822,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset,
 	}
 }
 
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+	u16 new_offset;
+	u32 val;
+
+	switch (offset) {
+	case UMAC_RBUF_OVFL_CNT_V1:
+		if (GENET_IS_V2(priv))
+			new_offset = RBUF_OVFL_CNT_V2;
+		else
+			new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+		val = bcmgenet_rbuf_readl(priv,	new_offset);
+		/* clear if overflowed */
+		if (val == ~0)
+			bcmgenet_rbuf_writel(priv, 0, new_offset);
+		break;
+	case UMAC_RBUF_ERR_CNT_V1:
+		if (GENET_IS_V2(priv))
+			new_offset = RBUF_ERR_CNT_V2;
+		else
+			new_offset = RBUF_ERR_CNT_V3PLUS;
+
+		val = bcmgenet_rbuf_readl(priv,	new_offset);
+		/* clear if overflowed */
+		if (val == ~0)
+			bcmgenet_rbuf_writel(priv, 0, new_offset);
+		break;
+	default:
+		val = bcmgenet_umac_readl(priv, offset);
+		/* clear if overflowed */
+		if (val == ~0)
+			bcmgenet_umac_writel(priv, 0, offset);
+		break;
+	}
+
+	return val;
+}
+
 static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 {
 	int i, j = 0;
@@ -845,10 +885,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 						  UMAC_MIB_START + j + offset);
 			break;
 		case BCMGENET_STAT_MISC:
-			val = bcmgenet_umac_readl(priv, s->reg_offset);
-			/* clear if overflowed */
-			if (val == ~0)
-				bcmgenet_umac_writel(priv, 0, s->reg_offset);
+			if (GENET_IS_V1(priv)) {
+				val = bcmgenet_umac_readl(priv, s->reg_offset);
+				/* clear if overflowed */
+				if (val == ~0)
+					bcmgenet_umac_writel(priv, 0,
+							     s->reg_offset);
+			} else {
+				val = bcmgenet_update_stat_misc(priv,
+								s->reg_offset);
+			}
 			break;
 		}
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1e2dc34d331a..d5fb0d772dcd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@ struct bcmgenet_mib_counters {
 #define  MDIO_REG_SHIFT			16
 #define  MDIO_REG_MASK			0x1F
 
-#define UMAC_RBUF_OVFL_CNT		0x61C
+#define UMAC_RBUF_OVFL_CNT_V1		0x61C
+#define RBUF_OVFL_CNT_V2		0x80
+#define RBUF_OVFL_CNT_V3PLUS		0x94
 
 #define UMAC_MPD_CTRL			0x620
 #define  MPD_EN				(1 << 0)
@@ -224,7 +226,9 @@ struct bcmgenet_mib_counters {
 
 #define UMAC_MPD_PW_MS			0x624
 #define UMAC_MPD_PW_LS			0x628
-#define UMAC_RBUF_ERR_CNT		0x634
+#define UMAC_RBUF_ERR_CNT_V1		0x634
+#define RBUF_ERR_CNT_V2			0x84
+#define RBUF_ERR_CNT_V3PLUS		0x98
 #define UMAC_MDF_ERR_CNT		0x638
 #define UMAC_MDF_CTRL			0x650
 #define UMAC_MDF_ADDR			0x654
-- 
cgit v1.2.3


From 1ad3d225e5a40ca6c586989b4baaca710544c15a Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:44 -0800
Subject: net: bcmgenet: correct MIB access of UniMAC RUNT counters

The gap between the Tx status counters and the Rx RUNT counters is now
being added to allow correct reporting of the registers.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f0fb7eca8eb4..01a172d95328 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -876,13 +876,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 		case BCMGENET_STAT_NETDEV:
 		case BCMGENET_STAT_SOFT:
 			continue;
-		case BCMGENET_STAT_MIB_RX:
-		case BCMGENET_STAT_MIB_TX:
 		case BCMGENET_STAT_RUNT:
-			if (s->type != BCMGENET_STAT_MIB_RX)
-				offset = BCMGENET_STAT_OFFSET;
+			offset += BCMGENET_STAT_OFFSET;
+			/* fall through */
+		case BCMGENET_STAT_MIB_TX:
+			offset += BCMGENET_STAT_OFFSET;
+			/* fall through */
+		case BCMGENET_STAT_MIB_RX:
 			val = bcmgenet_umac_readl(priv,
 						  UMAC_MIB_START + j + offset);
+			offset = 0;	/* Reset Offset */
 			break;
 		case BCMGENET_STAT_MISC:
 			if (GENET_IS_V1(priv)) {
-- 
cgit v1.2.3


From eca4bad73409aedc6ff22f823c18b67a4f08c851 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:45 -0800
Subject: net: bcmgenet: reserved phy revisions must be checked first

The reserved gphy_rev value of 0x01ff must be tested before the old
or new scheme for GPHY major versioning are tested, otherwise it will
be treated as 0xff00 according to the old scheme.

Fixes: b04a2f5b9ff5 ("net: bcmgenet: add support for new GENET PHY revision scheme")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 01a172d95328..99f8d9024633 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3226,6 +3226,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
 	 */
 	gphy_rev = reg & 0xffff;
 
+	/* This is reserved so should require special treatment */
+	if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+		pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+		return;
+	}
+
 	/* This is the good old scheme, just GPHY major, no minor nor patch */
 	if ((gphy_rev & 0xf0) != 0)
 		priv->gphy_rev = gphy_rev << 8;
@@ -3234,12 +3240,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
 	else if ((gphy_rev & 0xff00) != 0)
 		priv->gphy_rev = gphy_rev;
 
-	/* This is reserved so should require special treatment */
-	else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
-		pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
-		return;
-	}
-
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (!(params->flags & GENET_HAS_40BITS))
 		pr_warn("GENET does not support 40-bits PA\n");
-- 
cgit v1.2.3


From 7627409cc4970e8c8b9de6945ad86a575290a94e Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:46 -0800
Subject: net: bcmgenet: power down internal phy if open or resume fails

Since the internal PHY is powered up during the open and resume
functions it should be powered back down if the functions fail.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 99f8d9024633..475dc14931af 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2850,6 +2850,8 @@ err_irq0:
 err_fini_dma:
 	bcmgenet_fini_dma(priv);
 err_clk_disable:
+	if (priv->internal_phy)
+		bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
 	clk_disable_unprepare(priv->clk);
 	return ret;
 }
@@ -3551,6 +3553,8 @@ static int bcmgenet_resume(struct device *d)
 	return 0;
 
 out_clk_disable:
+	if (priv->internal_phy)
+		bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
 	clk_disable_unprepare(priv->clk);
 	return ret;
 }
-- 
cgit v1.2.3


From 07c52d6a0b955a8a28834f9354793cfc4b81d0e9 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:47 -0800
Subject: net: bcmgenet: synchronize irq0 status between the isr and task

Add a spinlock to ensure that irq0_stat is not unintentionally altered
as the result of preemption.  Also removed unserviced irq0 interrupts
and removed irq1_stat since there is no bottom half service for those
interrupts.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 73 ++++++++++++++------------
 drivers/net/ethernet/broadcom/genet/bcmgenet.h |  6 ++-
 2 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 475dc14931af..527cecaf12c1 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2506,24 +2506,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
 /* Interrupt bottom half */
 static void bcmgenet_irq_task(struct work_struct *work)
 {
+	unsigned long flags;
+	unsigned int status;
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
-	if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
-		priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+	spin_lock_irqsave(&priv->lock, flags);
+	status = priv->irq0_stat;
+	priv->irq0_stat = 0;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (status & UMAC_IRQ_MPD_R) {
 		netif_dbg(priv, wol, priv->dev,
 			  "magic packet detected, waking up\n");
 		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
 	}
 
 	/* Link UP/DOWN event */
-	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+	if (status & UMAC_IRQ_LINK_EVENT)
 		phy_mac_interrupt(priv->phydev,
-				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
-		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
-	}
+				  !!(status & UMAC_IRQ_LINK_UP));
 }
 
 /* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2532,22 +2536,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 	struct bcmgenet_priv *priv = dev_id;
 	struct bcmgenet_rx_ring *rx_ring;
 	struct bcmgenet_tx_ring *tx_ring;
-	unsigned int index;
+	unsigned int index, status;
 
-	/* Save irq status for bottom-half processing. */
-	priv->irq1_stat =
-		bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+	/* Read irq status */
+	status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
 		~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
 
 	/* clear interrupts */
-	bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+	bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
 
 	netif_dbg(priv, intr, priv->dev,
-		  "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+		  "%s: IRQ=0x%x\n", __func__, status);
 
 	/* Check Rx priority queue interrupts */
 	for (index = 0; index < priv->hw_params->rx_queues; index++) {
-		if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+		if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
 			continue;
 
 		rx_ring = &priv->rx_rings[index];
@@ -2560,7 +2563,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 
 	/* Check Tx priority queue interrupts */
 	for (index = 0; index < priv->hw_params->tx_queues; index++) {
-		if (!(priv->irq1_stat & BIT(index)))
+		if (!(status & BIT(index)))
 			continue;
 
 		tx_ring = &priv->tx_rings[index];
@@ -2580,19 +2583,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 	struct bcmgenet_priv *priv = dev_id;
 	struct bcmgenet_rx_ring *rx_ring;
 	struct bcmgenet_tx_ring *tx_ring;
+	unsigned int status;
+	unsigned long flags;
 
-	/* Save irq status for bottom-half processing. */
-	priv->irq0_stat =
-		bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+	/* Read irq status */
+	status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
 		~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
 
 	/* clear interrupts */
-	bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+	bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
 
 	netif_dbg(priv, intr, priv->dev,
-		  "IRQ=0x%x\n", priv->irq0_stat);
+		  "IRQ=0x%x\n", status);
 
-	if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+	if (status & UMAC_IRQ_RXDMA_DONE) {
 		rx_ring = &priv->rx_rings[DESC_INDEX];
 
 		if (likely(napi_schedule_prep(&rx_ring->napi))) {
@@ -2601,7 +2605,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 		}
 	}
 
-	if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+	if (status & UMAC_IRQ_TXDMA_DONE) {
 		tx_ring = &priv->tx_rings[DESC_INDEX];
 
 		if (likely(napi_schedule_prep(&tx_ring->napi))) {
@@ -2610,22 +2614,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 		}
 	}
 
-	if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
-				UMAC_IRQ_PHY_DET_F |
-				UMAC_IRQ_LINK_EVENT |
-				UMAC_IRQ_HFB_SM |
-				UMAC_IRQ_HFB_MM |
-				UMAC_IRQ_MPD_R)) {
-		/* all other interested interrupts handled in bottom half */
-		schedule_work(&priv->bcmgenet_irq_work);
-	}
-
 	if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-	    priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
-		priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+		status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
 		wake_up(&priv->wq);
 	}
 
+	/* all other interested interrupts handled in bottom half */
+	status &= (UMAC_IRQ_LINK_EVENT |
+		   UMAC_IRQ_MPD_R);
+	if (status) {
+		/* Save irq status for bottom-half processing. */
+		spin_lock_irqsave(&priv->lock, flags);
+		priv->irq0_stat |= status;
+		spin_unlock_irqrestore(&priv->lock, flags);
+
+		schedule_work(&priv->bcmgenet_irq_work);
+	}
+
 	return IRQ_HANDLED;
 }
 
@@ -3327,6 +3332,8 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		goto err;
 	}
 
+	spin_lock_init(&priv->lock);
+
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	dev_set_drvdata(&pdev->dev, dev);
 	ether_addr_copy(dev->dev_addr, macaddr);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index d5fb0d772dcd..db7f289d65ae 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -623,11 +623,13 @@ struct bcmgenet_priv {
 	struct work_struct bcmgenet_irq_work;
 	int irq0;
 	int irq1;
-	unsigned int irq0_stat;
-	unsigned int irq1_stat;
 	int wol_irq;
 	bool wol_irq_disabled;
 
+	/* shared status */
+	spinlock_t lock;
+	unsigned int irq0_stat;
+
 	/* HW descriptors/checksum variables */
 	bool desc_64b_en;
 	bool desc_rxchk_en;
-- 
cgit v1.2.3


From 6be371b053dc86f11465cc1abce2e99bda0a0574 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:48 -0800
Subject: net: bcmgenet: Power up the internal PHY before probing the MII

When using the internal PHY it must be powered up when the MII is probed
or the PHY will not be detected.  Since the PHY is powered up at reset
this has not been a problem.  However, when the kernel is restarted with
kexec the PHY will likely be powered down when the kernel starts so it
will not be detected and the Ethernet link will not be established.

This commit explicitly powers up the internal PHY when the GENET driver
is probed to correct this behavior.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 527cecaf12c1..f93098840775 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3289,6 +3289,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	const void *macaddr;
 	struct resource *r;
 	int err = -EIO;
+	const char *phy_mode_str;
 
 	/* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
 	dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3396,6 +3397,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		priv->clk_eee = NULL;
 	}
 
+	/* If this is an internal GPHY, power it on now, before UniMAC is
+	 * brought out of reset as absolutely no UniMAC activity is allowed
+	 */
+	if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+	    !strcasecmp(phy_mode_str, "internal"))
+		bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
 	err = reset_umac(priv);
 	if (err)
 		goto err_clk_disable;
-- 
cgit v1.2.3


From 89316fa34ab8afac8d693f41a5bc268673f1da15 Mon Sep 17 00:00:00 2001
From: Edwin Chan <edwin.chan@broadcom.com>
Date: Thu, 9 Mar 2017 16:58:49 -0800
Subject: net: bcmgenet: add begin/complete ethtool ops

Make sure clock is enabled for ethtool ops.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Edwin Chan <edwin.chan@broadcom.com>
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f93098840775..ec1f3014e410 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -450,6 +450,22 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 			genet_dma_ring_regs[r]);
 }
 
+static int bcmgenet_begin(struct net_device *dev)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	/* Turn on the clock */
+	return clk_prepare_enable(priv->clk);
+}
+
+static void bcmgenet_complete(struct net_device *dev)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	/* Turn off the clock */
+	clk_disable_unprepare(priv->clk);
+}
+
 static int bcmgenet_get_link_ksettings(struct net_device *dev,
 				       struct ethtool_link_ksettings *cmd)
 {
@@ -1022,6 +1038,8 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 
 /* standard ethtool support functions. */
 static const struct ethtool_ops bcmgenet_ethtool_ops = {
+	.begin			= bcmgenet_begin,
+	.complete		= bcmgenet_complete,
 	.get_strings		= bcmgenet_get_strings,
 	.get_sset_count		= bcmgenet_get_sset_count,
 	.get_ethtool_stats	= bcmgenet_get_ethtool_stats,
-- 
cgit v1.2.3


From 6d22fe14005ce66d1a120495ac16499b944feb95 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:50 -0800
Subject: net: bcmgenet: decouple flow control from bcmgenet_tx_reclaim

The bcmgenet_tx_reclaim() function is used to reclaim transmit
resources in different places within the driver.  Most of them
should not affect the state of the transmit flow control.

This commit relocates the logic for waking tx queues based on
freed resources to the napi polling function where it is more
appropriate.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ec1f3014e410..69015fa50f20 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1234,7 +1234,6 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device *kdev = &priv->pdev->dev;
 	struct enet_cb *tx_cb_ptr;
-	struct netdev_queue *txq;
 	unsigned int pkts_compl = 0;
 	unsigned int bytes_compl = 0;
 	unsigned int c_index;
@@ -1286,13 +1285,8 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 	dev->stats.tx_packets += pkts_compl;
 	dev->stats.tx_bytes += bytes_compl;
 
-	txq = netdev_get_tx_queue(dev, ring->queue);
-	netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
-
-	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
-		if (netif_tx_queue_stopped(txq))
-			netif_tx_wake_queue(txq);
-	}
+	netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
+				  pkts_compl, bytes_compl);
 
 	return pkts_compl;
 }
@@ -1315,8 +1309,16 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
 	struct bcmgenet_tx_ring *ring =
 		container_of(napi, struct bcmgenet_tx_ring, napi);
 	unsigned int work_done = 0;
+	struct netdev_queue *txq;
+	unsigned long flags;
 
-	work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+	spin_lock_irqsave(&ring->lock, flags);
+	work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
+	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+		txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
+		netif_tx_wake_queue(txq);
+	}
+	spin_unlock_irqrestore(&ring->lock, flags);
 
 	if (work_done == 0) {
 		napi_complete(napi);
-- 
cgit v1.2.3


From 46f401c4297a2232a037ad8801b6c83c90414cf7 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Thu, 9 Mar 2017 20:33:51 -0600
Subject: powerpc/pmac: Fix crash in dma-mapping.h with NULL dma_ops

Commit 5657933dbb6e ("treewide: Move dma_ops from struct dev_archdata
into struct device") introduced a crash for macio devices, an example
backtrace being:

  kernel BUG at ./include/linux/dma-mapping.h:465!
  Oops: Exception in kernel mode, sig: 5 [#1]
  ...
  NIP [c031ddb0] dmam_alloc_coherent+0x74/0x140
  LR [c031de70] dmam_alloc_coherent+0x134/0x140
  Call Trace:
   dmam_alloc_coherent+0x134/0x140 (unreliable)
   pata_macio_port_start+0x3c/0x8c
   ata_host_start.part.5+0xfc/0x208
   ata_host_activate+0x128/0x154
   pata_macio_common_init+0x2f0/0x538
   pata_macio_attach+0xd8/0x180
   macio_device_probe+0x5c/0xec
   driver_probe_device+0x21c/0x314
   __driver_attach+0xcc/0xd0
   bus_for_each_dev+0x68/0xb4
   bus_add_driver+0x1dc/0x244
   driver_register+0x88/0x130
   pata_macio_init+0x5c/0x88
   do_one_initcall+0x40/0x170
   kernel_init_freeable+0x134/0x1d0
   kernel_init+0x18/0x110
   ret_from_kernel_thread+0x5c/0x64

This was caused by the device having NULL dma_ops, triggering the
BUG_ON(). Previously the device inherited its dma_ops via the assignment
to dev->ofdev.dev.archdata. However after commit 5657933dbb6e the
dma_ops are moved into dev->ofdev.dev, and so they need to be explicitly
copied.

Fixes: 5657933dbb6e ("treewide: Move dma_ops from struct dev_archdata into struct device")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[mpe: Rewrite change log, add backtrace]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/macintosh/macio_asic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 3f041b187033..f757cef293f8 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -392,6 +392,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 	 * To get all the fields, copy all archdata
 	 */
 	dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
+	dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops;
 #endif /* CONFIG_PCI */
 
 #ifdef DEBUG
-- 
cgit v1.2.3


From 1363875bdb6317a2d0798284d7aaf320f0782f6d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:46 +1000
Subject: powerpc/64s: fix handling of non-synchronous machine checks

A synchronous machine check is an exception raised by the attempt to
execute the current instruction. If the error can't be corrected, it
can make sense to SIGBUS the currently running process.

In other cases, the error condition is not related to the current
instruction, so killing the current process is not the right thing to
do.

Today, all machine checks are MCE_SEV_ERROR_SYNC, so this has no
practical change. It will be used to handle POWER9 asynchronous
machine checks.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 86d9fde93c17..e0f856bfbfe8 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs,
 					struct machine_check_event *evt)
 {
 	int recovered = 0;
-	uint64_t ea = get_mce_fault_addr(evt);
 
 	if (!(regs->msr & MSR_RI)) {
 		/* If MSR_RI isn't set, we cannot recover */
@@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs,
 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 		/* Platform corrected itself */
 		recovered = 1;
-	} else if (ea && !is_kernel_addr(ea)) {
+	} else if (evt->severity == MCE_SEV_FATAL) {
+		/* Fatal machine check */
+		pr_err("Machine check interrupt is fatal\n");
+		recovered = 0;
+	} else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+			(user_mode(regs) && !is_global_init(current))) {
 		/*
-		 * Faulting address is not in kernel text. We should be fine.
-		 * We need to find which process uses this address.
 		 * For now, kill the task if we have received exception when
 		 * in userspace.
 		 *
 		 * TODO: Queue up this address for hwpoisioning later.
 		 */
-		if (user_mode(regs) && !is_global_init(current)) {
-			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
-			recovered = 1;
-		} else
-			recovered = 0;
-	} else if (user_mode(regs) && !is_global_init(current) &&
-		evt->severity == MCE_SEV_ERROR_SYNC) {
-		/*
-		 * If we have received a synchronous error when in userspace
-		 * kill the task.
-		 */
 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 		recovered = 1;
 	}
-- 
cgit v1.2.3


From c1bbf387d6191e6e18f3adc4db45b922822c2ba4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:47 +1000
Subject: powerpc/64s: allow machine check handler to set severity and
 initiator

Currently severity and initiator are always set to MCE_SEV_ERROR_SYNC and
MCE_INITIATOR_CPU in the core mce code. Allow them to be set by the
machine specific mce handlers.

No functional change for existing handlers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mce.h  | 3 ++-
 arch/powerpc/kernel/mce.c       | 5 +++--
 arch/powerpc/kernel/mce_power.c | 6 ++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f97d8cb6bdf6..b2a5865ccd87 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -177,7 +177,8 @@ struct mce_error_info {
 		enum MCE_EratErrorType erat_error_type:8;
 		enum MCE_TlbErrorType tlb_error_type:8;
 	} u;
-	uint8_t		reserved[2];
+	enum MCE_Severity	severity:8;
+	enum MCE_Initiator	initiator:8;
 };
 
 #define MAX_MC_EVT	100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index c6923ff45131..949507277436 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -90,13 +90,14 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	mce->gpr3 = regs->gpr[3];
 	mce->in_use = 1;
 
-	mce->initiator = MCE_INITIATOR_CPU;
 	/* Mark it recovered if we have handled it and MSR(RI=1). */
 	if (handled && (regs->msr & MSR_RI))
 		mce->disposition = MCE_DISPOSITION_RECOVERED;
 	else
 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
-	mce->severity = MCE_SEV_ERROR_SYNC;
+
+	mce->initiator = mce_err->initiator;
+	mce->severity = mce_err->severity;
 
 	/*
 	 * Populate the mce error_type and type-specific error_type.
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7353991c4ece..c37fc5fdd433 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -281,6 +281,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
 	long handled = 1;
 	struct mce_error_info mce_error_info = { 0 };
 
+	mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+	mce_error_info.initiator = MCE_INITIATOR_CPU;
+
 	srr1 = regs->msr;
 	nip = regs->nip;
 
@@ -352,6 +355,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 	long handled = 1;
 	struct mce_error_info mce_error_info = { 0 };
 
+	mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+	mce_error_info.initiator = MCE_INITIATOR_CPU;
+
 	srr1 = regs->msr;
 	nip = regs->nip;
 
-- 
cgit v1.2.3


From 7b9f71f974a12740e79e918cfd58c2fce0b5b580 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:48 +1000
Subject: powerpc/64s: POWER9 machine check handler

Add POWER9 machine check handler. There are several new types of errors
added, so logging messages for those are also added.

This doesn't attempt to reuse any of the P7/8 defines or functions,
because that becomes too complex. The better option in future is to use
a table driven approach.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/bitops.h |   4 +
 arch/powerpc/include/asm/mce.h    | 105 +++++++++++++++++
 arch/powerpc/kernel/cputable.c    |   3 +
 arch/powerpc/kernel/mce.c         |  83 ++++++++++++++
 arch/powerpc/kernel/mce_power.c   | 231 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 426 insertions(+)

diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 73eb794d6163..bc5fdfd22788 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -51,6 +51,10 @@
 #define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
 #define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
 
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit)			\
+	((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
 #include <asm/barrier.h>
 
 /* Macro for generating the ***_bits() functions */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index b2a5865ccd87..ed62efe01e49 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,6 +66,55 @@
 
 #define P8_DSISR_MC_SLB_ERRORS		(P7_DSISR_MC_SLB_ERRORS | \
 					 P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1)	(((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1)	(	\
+	PPC_BITEXTRACT(srr1, 45, 0) |	\
+	PPC_BITEXTRACT(srr1, 44, 1) |	\
+	PPC_BITEXTRACT(srr1, 43, 2) |	\
+	PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE				1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY			2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT			3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT			4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT			5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD			6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT			8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT	9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA			11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK		12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE		13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT	14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN	15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE					(PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK			(PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT			(PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT		(PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT			(PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB			(PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE				(PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB			(PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB			(PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD				(PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK			(PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN		(PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN				(PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS		(P9_DSISR_MC_ERAT_MULTIHIT | \
+					 P9_DSISR_MC_SLB_PARITY_MFSLB | \
+					 P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
 enum MCE_Version {
 	MCE_V1 = 1,
 };
@@ -93,6 +142,9 @@ enum MCE_ErrorType {
 	MCE_ERROR_TYPE_SLB = 2,
 	MCE_ERROR_TYPE_ERAT = 3,
 	MCE_ERROR_TYPE_TLB = 4,
+	MCE_ERROR_TYPE_USER = 5,
+	MCE_ERROR_TYPE_RA = 6,
+	MCE_ERROR_TYPE_LINK = 7,
 };
 
 enum MCE_UeErrorType {
@@ -121,6 +173,32 @@ enum MCE_TlbErrorType {
 	MCE_TLB_ERROR_MULTIHIT = 2,
 };
 
+enum MCE_UserErrorType {
+	MCE_USER_ERROR_INDETERMINATE = 0,
+	MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+	MCE_RA_ERROR_INDETERMINATE = 0,
+	MCE_RA_ERROR_IFETCH = 1,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+	MCE_RA_ERROR_LOAD = 4,
+	MCE_RA_ERROR_STORE = 5,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+	MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+	MCE_LINK_ERROR_INDETERMINATE = 0,
+	MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+	MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+	MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
 struct machine_check_event {
 	enum MCE_Version	version:8;	/* 0x00 */
 	uint8_t			in_use;		/* 0x01 */
@@ -166,6 +244,30 @@ struct machine_check_event {
 			uint64_t	effective_address;
 			uint8_t		reserved_2[16];
 		} tlb_error;
+
+		struct {
+			enum MCE_UserErrorType user_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} user_error;
+
+		struct {
+			enum MCE_RaErrorType ra_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} ra_error;
+
+		struct {
+			enum MCE_LinkErrorType link_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} link_error;
 	} u;
 };
 
@@ -176,6 +278,9 @@ struct mce_error_info {
 		enum MCE_SlbErrorType slb_error_type:8;
 		enum MCE_EratErrorType erat_error_type:8;
 		enum MCE_TlbErrorType tlb_error_type:8;
+		enum MCE_UserErrorType user_error_type:8;
+		enum MCE_RaErrorType ra_error_type:8;
+		enum MCE_LinkErrorType link_error_type:8;
 	} u;
 	enum MCE_Severity	severity:8;
 	enum MCE_Initiator	initiator:8;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bb7a1890aeb7..e79b9daa873c 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action);
 extern void __flush_tlb_power9(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power9,
 		.cpu_restore		= __restore_cpu_power9,
 		.flush_tlb		= __flush_tlb_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
 	{	/* Power9 */
@@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power9,
 		.cpu_restore		= __restore_cpu_power9,
 		.flush_tlb		= __flush_tlb_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
 	{	/* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 949507277436..a1475e6aef3a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce,
 	case MCE_ERROR_TYPE_TLB:
 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 		break;
+	case MCE_ERROR_TYPE_USER:
+		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+		break;
 	case MCE_ERROR_TYPE_UNKNOWN:
 	default:
 		break;
@@ -116,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
 		mce->u.erat_error.effective_address_provided = true;
 		mce->u.erat_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+		mce->u.user_error.effective_address_provided = true;
+		mce->u.user_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+		mce->u.ra_error.effective_address_provided = true;
+		mce->u.ra_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+		mce->u.link_error.effective_address_provided = true;
+		mce->u.link_error.effective_address = addr;
 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
 		mce->u.ue_error.effective_address_provided = true;
 		mce->u.ue_error.effective_address = addr;
@@ -240,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 		"Parity",
 		"Multihit",
 	};
+	static const char *mc_user_types[] = {
+		"Indeterminate",
+		"tlbie(l) invalid",
+	};
+	static const char *mc_ra_types[] = {
+		"Indeterminate",
+		"Instruction fetch (bad)",
+		"Page table walk ifetch (bad)",
+		"Page table walk ifetch (foreign)",
+		"Load (bad)",
+		"Store (bad)",
+		"Page table walk Load/Store (bad)",
+		"Page table walk Load/Store (foreign)",
+		"Load/Store (foreign)",
+	};
+	static const char *mc_link_types[] = {
+		"Indeterminate",
+		"Instruction fetch (timeout)",
+		"Page table walk ifetch (timeout)",
+		"Load (timeout)",
+		"Store (timeout)",
+		"Page table walk Load/Store (timeout)",
+	};
 
 	/* Print things out */
 	if (evt->version != MCE_V1) {
@@ -316,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 			printk("%s    Effective address: %016llx\n",
 			       level, evt->u.tlb_error.effective_address);
 		break;
+	case MCE_ERROR_TYPE_USER:
+		subtype = evt->u.user_error.user_error_type <
+			ARRAY_SIZE(mc_user_types) ?
+			mc_user_types[evt->u.user_error.user_error_type]
+			: "Unknown";
+		printk("%s  Error type: User [%s]\n", level, subtype);
+		if (evt->u.user_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.user_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_RA:
+		subtype = evt->u.ra_error.ra_error_type <
+			ARRAY_SIZE(mc_ra_types) ?
+			mc_ra_types[evt->u.ra_error.ra_error_type]
+			: "Unknown";
+		printk("%s  Error type: Real address [%s]\n", level, subtype);
+		if (evt->u.ra_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.ra_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		subtype = evt->u.link_error.link_error_type <
+			ARRAY_SIZE(mc_link_types) ?
+			mc_link_types[evt->u.link_error.link_error_type]
+			: "Unknown";
+		printk("%s  Error type: Link [%s]\n", level, subtype);
+		if (evt->u.link_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.link_error.effective_address);
+		break;
 	default:
 	case MCE_ERROR_TYPE_UNKNOWN:
 		printk("%s  Error type: Unknown\n", level);
@@ -342,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
 		if (evt->u.tlb_error.effective_address_provided)
 			return evt->u.tlb_error.effective_address;
 		break;
+	case MCE_ERROR_TYPE_USER:
+		if (evt->u.user_error.effective_address_provided)
+			return evt->u.user_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		if (evt->u.ra_error.effective_address_provided)
+			return evt->u.ra_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		if (evt->u.link_error.effective_address_provided)
+			return evt->u.link_error.effective_address;
+		break;
 	default:
 	case MCE_ERROR_TYPE_UNKNOWN:
 		break;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c37fc5fdd433..763d6f58caa8 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -116,6 +116,51 @@ static void flush_and_reload_slb(void)
 }
 #endif
 
+static void flush_erat(void)
+{
+	asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+	if (what == MCE_FLUSH_SLB) {
+		flush_and_reload_slb();
+		return 1;
+	}
+#endif
+	if (what == MCE_FLUSH_ERAT) {
+		flush_erat();
+		return 1;
+	}
+	if (what == MCE_FLUSH_TLB) {
+		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+	if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+		dsisr &= ~slb;
+	if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+		dsisr &= ~erat;
+	if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+		dsisr &= ~tlb;
+	/* Any other errors we don't understand? */
+	if (dsisr)
+		return 0;
+	return 1;
+}
+
 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 {
 	long handled = 1;
@@ -378,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
 	return handled;
 }
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+	uint64_t dsisr = regs->dsisr;
+
+	return mce_handle_flush_derrors(dsisr,
+			P9_DSISR_MC_SLB_PARITY_MFSLB |
+			P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+			P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+			P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+	uint64_t srr1 = regs->msr;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_SLB_PARITY:
+	case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		return mce_flush(MCE_FLUSH_SLB);
+	case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		return mce_flush(MCE_FLUSH_TLB);
+	case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+		return mce_flush(MCE_FLUSH_ERAT);
+	default:
+		return 0;
+	}
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+		struct mce_error_info *mce_err, uint64_t *addr)
+{
+	uint64_t dsisr = regs->dsisr;
+
+	mce_err->severity = MCE_SEV_ERROR_SYNC;
+	mce_err->initiator = MCE_INITIATOR_CPU;
+
+	if (dsisr & P9_DSISR_MC_USER_TLBIE)
+		*addr = regs->nip;
+	else
+		*addr = regs->dar;
+
+	if (dsisr & P9_DSISR_MC_UE) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+	} else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+	} else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+		mce_err->error_type = MCE_ERROR_TYPE_USER;
+		mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+	} else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+	} else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+	} else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+	} else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+	}
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+		struct mce_error_info *mce_err, uint64_t *addr)
+{
+	uint64_t srr1 = regs->msr;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+	case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+		mce_err->severity = MCE_SEV_FATAL;
+		break;
+	default:
+		mce_err->severity = MCE_SEV_ERROR_SYNC;
+		break;
+	}
+
+	mce_err->initiator = MCE_INITIATOR_CPU;
+
+	*addr = regs->nip;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_UE:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_SLB_PARITY:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+		break;
+	case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_RA:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+		break;
+	default:
+		break;
+	}
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+	uint64_t nip, addr;
+	long handled;
+	struct mce_error_info mce_error_info = { 0 };
+
+	nip = regs->nip;
+
+	if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+		handled = mce_handle_derror_p9(regs);
+		mce_get_derror_p9(regs, &mce_error_info, &addr);
+	} else {
+		handled = mce_handle_ierror_p9(regs);
+		mce_get_ierror_p9(regs, &mce_error_info, &addr);
+	}
+
+	/* Handle UE error. */
+	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+		handled = mce_handle_ue_error(regs);
+
+	save_mce_event(regs, handled, &mce_error_info, nip, addr);
+	return handled;
+}
-- 
cgit v1.2.3


From 296739839fa2851e6badc77dcfc45050094cb102 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 9 Mar 2017 20:53:31 +0100
Subject: net: phy: marvell: Fix double free of hwmon device

The hwmon temperature sensor devices is registered using a devm_hwmon
API call.  The marvell_release() would then manually free the device,
not using a devm_hmon API, resulting in the device being removed
twice, leading to a crash in kernfs_find_ns() during the second
removal.

Remove the manual removal, which makes marvell_release() empty, so
remove it as well.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Fixes: 0b04680fdae4 ("phy: marvell: Add support for temperature sensor")
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index f9d0fa315a47..272b051a0199 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1883,17 +1883,6 @@ static int m88e1510_probe(struct phy_device *phydev)
 	return m88e1510_hwmon_probe(phydev);
 }
 
-static void marvell_remove(struct phy_device *phydev)
-{
-#ifdef CONFIG_HWMON
-
-	struct marvell_priv *priv = phydev->priv;
-
-	if (priv && priv->hwmon_dev)
-		hwmon_device_unregister(priv->hwmon_dev);
-#endif
-}
-
 static struct phy_driver marvell_drivers[] = {
 	{
 		.phy_id = MARVELL_PHY_ID_88E1101,
@@ -1974,7 +1963,6 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1121_probe,
-		.remove = &marvell_remove,
 		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2087,7 +2075,6 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1510_probe,
-		.remove = &marvell_remove,
 		.config_init = &m88e1510_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2109,7 +2096,6 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = m88e1510_probe,
-		.remove = &marvell_remove,
 		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2127,7 +2113,6 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1545",
 		.probe = m88e1510_probe,
-		.remove = &marvell_remove,
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &marvell_config_init,
-- 
cgit v1.2.3


From a1016e94cce9fb6ea56d7602263783e2d95d6e92 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Mar 2017 17:14:32 +0000
Subject: ARM: wire up statx syscall

Wire up the new statx syscall for ARM.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/tools/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 3c2cb5d5adfa..0bb0e9c6376c 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -411,3 +411,4 @@
 394	common	pkey_mprotect		sys_pkey_mprotect
 395	common	pkey_alloc		sys_pkey_alloc
 396	common	pkey_free		sys_pkey_free
+397	common	statx			sys_statx
-- 
cgit v1.2.3


From bba8376aea1dcbbe22bbda118c52abee317c7609 Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Thu, 9 Mar 2017 14:00:17 +0100
Subject: x86/reboot/quirks: Fix typo in ASUS EeeBook X205TA reboot quirk

The reboot quirk for ASUS EeeBook X205TA contains a typo in
DMI_PRODUCT_NAME, improperly referring to X205TAW instead of
X205TA, which prevents the quirk from being triggered. The
model X205TAW already has a reboot quirk of its own.

This fix simply removes the inappropriate final letter W.

Fixes: 90b28ded88dd ("x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk")
Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Link: http://lkml.kernel.org/r/1489064417-7445-1-git-send-email-matjaz.hegedic@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4194d6f9bb29..067f9813fd2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 		.ident = "ASUS EeeBook X205TA",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
 		},
 	},
 	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
-- 
cgit v1.2.3


From 9a8b0a230aca55ee142fd76f4765f1da1799da93 Mon Sep 17 00:00:00 2001
From: Mihail Atanassov <mihail.atanassov@arm.com>
Date: Wed, 15 Feb 2017 14:00:15 +0000
Subject: drm: mali-dp: Remove mclk rate management

The rate of mclk depends on the use-case. If no downscaling is required,
then mclk == pxlclk is a valid option; with downscaling however, the
rate at which mclk runs determines how much a plane can be downscaled
before composition. This is a system integration + power management
issue that is more suited to firmware rather than this driver.

Signed-off-by: Mihail Atanassov <mihail.atanassov@arm.com>
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
---
 drivers/gpu/drm/arm/malidp_crtc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index 08e6a71f5d05..294b53697334 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc)
 
 	clk_prepare_enable(hwdev->pxlclk);
 
-	/* mclk needs to be set to the same or higher rate than pxlclk */
-	clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000);
+	/* We rely on firmware to set mclk to a sensible level. */
 	clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000);
 
 	hwdev->modeset(hwdev, &vm);
-- 
cgit v1.2.3


From d1479f6108006555fe33d7cfe8db4f95ad614b9a Mon Sep 17 00:00:00 2001
From: Mihail Atanassov <mihail.atanassov@arm.com>
Date: Thu, 9 Feb 2017 11:32:00 +0000
Subject: drm: mali-dp: Fix smart layer not going to composition

Use rectangle 1 as a generic plane. Existing code already sets the smart
layer bounding box size + offset. The rectangles' offsets are relative
to the bounding box, so there is no need to set R1's offset (reset value
is 0), just its size which is the same as the bounding box.

Signed-off-by: Mihail Atanassov <mihail.atanassov@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/arm/malidp_hw.c     |  2 +-
 drivers/gpu/drm/arm/malidp_planes.c | 18 ++++++++++++++++--
 drivers/gpu/drm/arm/malidp_regs.h   |  1 +
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index 488aedf5b58d..9f5513006eee 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = {
 	{ DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
 	{ DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE },
 	{ DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
-	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 },
+	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE },
 };
 
 #define MALIDP_DE_DEFAULT_PREFETCH_START	5
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 414aada10fe5..d5aec082294c 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -37,6 +37,8 @@
 #define   LAYER_V_VAL(x)		(((x) & 0x1fff) << 16)
 #define MALIDP_LAYER_COMP_SIZE		0x010
 #define MALIDP_LAYER_OFFSET		0x014
+#define MALIDP550_LS_ENABLE		0x01c
+#define MALIDP550_LS_R1_IN_SIZE		0x020
 
 /*
  * This 4-entry look-up-table is used to determine the full 8-bit alpha value
@@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane,
 			LAYER_V_VAL(plane->state->crtc_y),
 			mp->layer->base + MALIDP_LAYER_OFFSET);
 
+	if (mp->layer->id == DE_SMART)
+		malidp_hw_write(mp->hwdev,
+				LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h),
+				mp->layer->base + MALIDP550_LS_R1_IN_SIZE);
+
 	/* first clear the rotation bits */
 	val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL);
 	val &= ~LAYER_ROT_MASK;
@@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm)
 		plane->hwdev = malidp->dev;
 		plane->layer = &map->layers[i];
 
-		/* Skip the features which the SMART layer doesn't have */
-		if (id == DE_SMART)
+		if (id == DE_SMART) {
+			/*
+			 * Enable the first rectangle in the SMART layer to be
+			 * able to use it as a drm plane.
+			 */
+			malidp_hw_write(malidp->dev, 1,
+					plane->layer->base + MALIDP550_LS_ENABLE);
+			/* Skip the features which the SMART layer doesn't have. */
 			continue;
+		}
 
 		drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
 		malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index aff6d4a84e99..b816067a65c5 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -84,6 +84,7 @@
 /* Stride register offsets relative to Lx_BASE */
 #define MALIDP_DE_LG_STRIDE		0x18
 #define MALIDP_DE_LV_STRIDE0		0x18
+#define MALIDP550_DE_LS_R1_STRIDE	0x28
 
 /* macros to set values into registers */
 #define MALIDP_DE_H_FRONTPORCH(x)	(((x) & 0xfff) << 0)
-- 
cgit v1.2.3


From 702f2ac87a9a8da23bf8506466bc70175fc970b2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 10 Mar 2017 07:48:49 +0000
Subject: rxrpc: Wake up the transmitter if Rx window size increases on the
 peer

The RxRPC ACK packet may contain an extension that includes the peer's
current Rx window size for this call.  We adjust the local Tx window size
to match.  However, the transmitter can stall if the receive window is
reduced to 0 by the peer and then reopened.

This is because the normal way that the transmitter is re-energised is by
dropping something out of our Tx queue and thus making space.  When a
single gap is made, the transmitter is woken up.  However, because there's
nothing in the Tx queue at this point, this doesn't happen.

To fix this, perform a wake_up() any time we see the peer's Rx window size
increasing.

The observable symptom is that calls start failing on ETIMEDOUT and the
following:

	kAFS: SERVER DEAD state=-62

appears in dmesg.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/input.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d74921c4969b..18b2ad8be8e2 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -652,6 +652,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_peer *peer;
 	unsigned int mtu;
+	bool wake = false;
 	u32 rwind = ntohl(ackinfo->rwind);
 
 	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
@@ -659,9 +660,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 	       ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
 	       rwind, ntohl(ackinfo->jumbo_max));
 
-	if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
-		rwind = RXRPC_RXTX_BUFF_SIZE - 1;
-	call->tx_winsize = rwind;
+	if (call->tx_winsize != rwind) {
+		if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+			rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+		if (rwind > call->tx_winsize)
+			wake = true;
+		call->tx_winsize = rwind;
+	}
+
 	if (call->cong_ssthresh > rwind)
 		call->cong_ssthresh = rwind;
 
@@ -675,6 +681,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 		spin_unlock_bh(&peer->lock);
 		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
 	}
+
+	if (wake)
+		wake_up(&call->waitq);
 }
 
 /*
-- 
cgit v1.2.3


From cb6950b7152fb3760942f9cb16bd2a35e5a1bfd1 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 7 Mar 2017 00:34:57 +0530
Subject: arm64: kprobes: remove kprobe_exceptions_notify

Commit fc62d0207ae0 ("kprobes: Introduce weak variant of
kprobe_exceptions_notify()") introduces a generic empty version of the
function for architectures that don't need special handling, like arm64.
As such, remove the arch/arm64/ specific handler.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/probes/kprobes.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 2a07aae5b8a2..c5c45942fb6e 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 	return 0;
 }
 
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-				       unsigned long val, void *data)
-{
-	return NOTIFY_DONE;
-}
-
 static void __kprobes kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p, *cur_kprobe;
-- 
cgit v1.2.3


From b0de0ccc8b9edd8846828e0ecdc35deacdf186b0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 6 Mar 2017 19:06:40 +0000
Subject: arm64: kasan: avoid bad virt_to_pfn()

Booting a v4.11-rc1 kernel with DEBUG_VIRTUAL and KASAN enabled produces
the following splat (trimmed for brevity):

[    0.000000] virt_to_phys used for non-linear address: ffff200008080000 (0xffff200008080000)
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x48/0x70
[    0.000000] PC is at __virt_to_phys+0x48/0x70
[    0.000000] LR is at __virt_to_phys+0x48/0x70
[    0.000000] Call trace:
[    0.000000] [<ffff2000080b1ac0>] __virt_to_phys+0x48/0x70
[    0.000000] [<ffff20000a03b86c>] kasan_init+0x1c0/0x498
[    0.000000] [<ffff20000a034018>] setup_arch+0x2fc/0x948
[    0.000000] [<ffff20000a030c68>] start_kernel+0xb8/0x570
[    0.000000] [<ffff20000a0301e8>] __primary_switched+0x6c/0x74

This is because we use virt_to_pfn() on a kernel image address when
trying to figure out its nid, so that we can allocate its shadow from
the same node.

As with other recent changes, this patch uses lm_alias() to solve this.

We could instead use NUMA_NO_NODE, as x86 does for all shadow
allocations, though we'll likely want the "real" memory shadow to be
backed from its corresponding nid anyway, so we may as well be
consistent and find the nid for the image shadow.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/kasan_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 55d1e9205543..687a358a3733 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -162,7 +162,7 @@ void __init kasan_init(void)
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
-			 pfn_to_nid(virt_to_pfn(_text)));
+			 pfn_to_nid(virt_to_pfn(lm_alias(_text))));
 
 	/*
 	 * vmemmap_populate() has populated the shadow region that covers the
-- 
cgit v1.2.3


From 5c2a625937ba49bc691089370638223d310cda9a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 8 Mar 2017 16:27:04 -0800
Subject: arm64: support keyctl() system call in 32-bit mode

As is the case for a number of other architectures that have a 32-bit
compat mode, enable KEYS_COMPAT if both COMPAT and KEYS are enabled.
This allows AArch32 programs to use the keyctl() system call when
running on an AArch64 kernel.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414e..f21e9a76ff67 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1063,6 +1063,10 @@ config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
 
+config KEYS_COMPAT
+	def_bool y
+	depends on COMPAT && KEYS
+
 endmenu
 
 menu "Power management options"
-- 
cgit v1.2.3


From 14088540ad63c648e5cdf490412033f792d16b6b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 10 Mar 2017 17:44:18 +0000
Subject: arm64: use const cap for system_uses_ttbr0_pan()

Since commit 4b65a5db362783ab ("arm64: Introduce
uaccess_{disable,enable} functionality based on TTBR0_EL1"),
system_uses_ttbr0_pan() has used cpus_have_cap() to determine whether
PAN is present.

Since commit a4023f682739439b ("arm64: Add hypervisor safe helper for
checking constant capabilities"), which was introduced around the same
time, cpus_have_cap() doesn't try to use a static key, and must always
perform a load, test, and consitional branch (likely a tbnz for the
latter two).

Elsewhere, we moved to using cpus_have_const_cap(), which can use a
static key (i.e. a non-conditional branch), which is patched at runtime
when the feature is detected.

This patch makes system_uses_ttbr0_pan() use cpus_have_const_cap(). The
static key is likely a win for hot-paths like the uacccess primitives,
and this makes our usage consistent regardless.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 05310ad8c5ab..f31c48d0cd68 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
 static inline bool system_uses_ttbr0_pan(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-		!cpus_have_cap(ARM64_HAS_PAN);
+		!cpus_have_const_cap(ARM64_HAS_PAN);
 }
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From af36370569eb37420e1e78a2e60c277b781fcd00 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:01 +0200
Subject: net/mlx5: Fix create autogroup prev initializer

The autogroups list is a list of non overlapping group boundaries
sorted by their start index. If the autogroups list wasn't empty
and an empty group slot was found at the start of the list,
the new group was added to the end of the list instead of the
beginning, as the prev initializer was incorrect.
When this was repeated, it caused multiple groups to have
overlapping boundaries.

Fixed that by correctly initializing the prev pointer to the
start of the list.

Fixes: eccec8da3b4e ('net/mlx5: Keep autogroups list ordered')
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2478516a61e2..ded27bb9a3b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1136,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
 						u32 *match_criteria)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct list_head *prev = ft->node.children.prev;
+	struct list_head *prev = &ft->node.children;
 	unsigned int candidate_index = 0;
 	struct mlx5_flow_group *fg;
 	void *match_criteria_addr;
-- 
cgit v1.2.3


From 5d47f6c89d568ab61712d8c40676fbb020b68752 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:02 +0200
Subject: net/mlx5: Don't save PCI state when PCI error is detected

When a PCI error is detected the PCI state could be corrupt, don't save
it in that flow. Save the state after initialization. After restoring the
PCI state during slot reset save it again, restoring the state destroys
the previously saved state info.

Fixes: 05ac2c0b7438 ('net/mlx5: Fix race between PCI error handlers and
health work')
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c4242a4e8130..e2bd600d19de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1352,6 +1352,7 @@ static int init_one(struct pci_dev *pdev,
 	if (err)
 		goto clean_load;
 
+	pci_save_state(pdev);
 	return 0;
 
 clean_load:
@@ -1407,9 +1408,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	mlx5_enter_error_state(dev);
 	mlx5_unload_one(dev, priv, false);
-	/* In case of kernel call save the pci state and drain the health wq */
+	/* In case of kernel call drain the health wq */
 	if (state) {
-		pci_save_state(pdev);
 		mlx5_drain_health_wq(dev);
 		mlx5_pci_disable_device(dev);
 	}
@@ -1461,6 +1461,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
 
 	pci_set_master(pdev);
 	pci_restore_state(pdev);
+	pci_save_state(pdev);
 
 	if (wait_vital(pdev)) {
 		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
-- 
cgit v1.2.3


From 33e21c59526e9147d7c68913995298f10c35cd6f Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:03 +0200
Subject: net/mlx5e: remove IEEE/CEE mode check when setting DCBX mode

Currently, the function setdcbx fails if the request dcbx mode
is either IEEE or CEE. We remove the IEEE/CEE mode check because
we support both IEEE and CEE interfaces.

Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 0523ed47f597..8fa23f6a1f67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -302,6 +302,9 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_dcbx *dcbx = &priv->dcbx;
 
+	if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+		return 1;
+
 	if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
 		if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
 			return 0;
@@ -315,13 +318,10 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 		return 1;
 	}
 
-	if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+	if (!(mode & DCB_CAP_DCBX_HOST))
 		return 1;
 
-	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-	    !(mode & DCB_CAP_DCBX_VER_CEE) ||
-	    !(mode & DCB_CAP_DCBX_VER_IEEE) ||
-	    !(mode & DCB_CAP_DCBX_HOST))
+	if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
 		return 1;
 
 	return 0;
-- 
cgit v1.2.3


From 65ba8fb7d5c6803ec236bb8d6650465fed7f9769 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:04 +0200
Subject: net/mlx5e: Avoid wrong identification of rules on deletion

When deleting offloaded TC flows, we must correctly identify E-switch
rules. The current check could get us wrong w.r.t to rules set on the
PF. Since it's possible to set NIC rules on the PF, switch to SRIOV
offloads mode and then attempt to delete a NIC rule.

To solve that, we add a flags field to offloaded rules, set it on
creation time and use that over the code where currently needed.

Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 33 ++++++++++++++-----------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 44406a5ec15d..79481f4cf264 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -48,9 +48,14 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+	MLX5E_TC_FLOW_ESWITCH	= BIT(0),
+};
+
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
 	u64			cookie;
+	u8			flags;
 	struct mlx5_flow_handle *rule;
 	struct list_head	encap; /* flows sharing the same encap */
 	struct mlx5_esw_flow_attr *attr;
@@ -177,7 +182,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 		mlx5_fc_destroy(priv->mdev, counter);
 	}
 
-	if (esw && esw->mode == SRIOV_OFFLOADS) {
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 		mlx5_eswitch_del_vlan_action(esw, flow->attr);
 		if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 			mlx5e_detach_encap(priv, flow);
@@ -598,6 +603,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 }
 
 static int parse_cls_flower(struct mlx5e_priv *priv,
+			    struct mlx5e_tc_flow *flow,
 			    struct mlx5_flow_spec *spec,
 			    struct tc_cls_flower_offload *f)
 {
@@ -609,7 +615,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
 	err = __parse_cls_flower(priv, spec, f, &min_inline);
 
-	if (!err && esw->mode == SRIOV_OFFLOADS &&
+	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
 	    rep->vport != FDB_UPLINK_VPORT) {
 		if (min_inline > esw->offloads.inline_mode) {
 			netdev_warn(priv->netdev,
@@ -1132,23 +1138,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
-	int err = 0;
-	bool fdb_flow = false;
+	int err, attr_size = 0;
 	u32 flow_tag, action;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_flow_spec *spec;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u8 flow_flags = 0;
 
-	if (esw && esw->mode == SRIOV_OFFLOADS)
-		fdb_flow = true;
-
-	if (fdb_flow)
-		flow = kzalloc(sizeof(*flow) +
-			       sizeof(struct mlx5_esw_flow_attr),
-			       GFP_KERNEL);
-	else
-		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+	if (esw && esw->mode == SRIOV_OFFLOADS) {
+		flow_flags = MLX5E_TC_FLOW_ESWITCH;
+		attr_size  = sizeof(struct mlx5_esw_flow_attr);
+	}
 
+	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec || !flow) {
 		err = -ENOMEM;
@@ -1156,12 +1158,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	}
 
 	flow->cookie = f->cookie;
+	flow->flags = flow_flags;
 
-	err = parse_cls_flower(priv, spec, f);
+	err = parse_cls_flower(priv, flow, spec, f);
 	if (err < 0)
 		goto err_free;
 
-	if (fdb_flow) {
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 		flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
 		err = parse_tc_fdb_actions(priv, f->exts, flow);
 		if (err < 0)
-- 
cgit v1.2.3


From ea29bd304d7b522f6162a36f394e690c579b5a63 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:05 +0200
Subject: net/mlx5e: Fix loopback selftest

Change packet type handler to ETH_P_IP instead of ETH_P_ALL
since we are already expecting an IP packet.

Also, using ETH_P_ALL will cause the loopback test packet type handler
to be called on all outgoing packets, especially our own self loopback
test SKB, which will be validated on xmit as well, and we don't want that.

Tested with:
ethtool -t ethX
validated that the loopback test passes.

Fixes: 0952da791c97 ('net/mlx5e: Add support for loopback selftest')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 31e3cb7ee5fe..5621dcfda4f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -204,9 +204,6 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
 	struct iphdr *iph;
 
 	/* We are only going to peek, no need to clone the SKB */
-	if (skb->protocol != htons(ETH_P_IP))
-		goto out;
-
 	if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
 		goto out;
 
@@ -249,7 +246,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
 	lbtp->loopback_ok = false;
 	init_completion(&lbtp->comp);
 
-	lbtp->pt.type = htons(ETH_P_ALL);
+	lbtp->pt.type = htons(ETH_P_IP);
 	lbtp->pt.func = mlx5e_test_loopback_validate;
 	lbtp->pt.dev = priv->netdev;
 	lbtp->pt.af_packet_priv = lbtp;
-- 
cgit v1.2.3


From 0e4c0e6ea7d4a988a5ae2791c7cb5769b5256dad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 17 Feb 2017 15:25:08 +0100
Subject: arm64: kernel: Update kerneldoc for cpu_suspend() rename

Commit af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it
aligned with arm") renamed cpu_suspend() to arm_cpuidle_suspend(), but
forgot to update the kerneldoc header.

Fixes: af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpuidle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 75a0f8acef66..fd691087dc9a 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
 }
 
 /**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
  * @arg: argument to pass to CPU suspend operations
  *
  * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
-- 
cgit v1.2.3


From b6573da139ecbee6f2c77392b51266d4521d50ac Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 10 Mar 2017 10:10:54 -0800
Subject: Input: synaptics-rmi4 - prevent null pointer dereference in f30

If the platform data has f30_data.disable set, f30 in rmi_f30_config()
might be null. Prevent a kernel oops by checking for non-null f30.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_f30.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c
index 3422464af229..b8572b342dcb 100644
--- a/drivers/input/rmi4/rmi_f30.c
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -170,6 +170,10 @@ static int rmi_f30_config(struct rmi_function *fn)
 				rmi_get_platform_data(fn->rmi_dev);
 	int error;
 
+	/* can happen if f30_data.disable is set */
+	if (!f30)
+		return 0;
+
 	if (pdata->f30_data.trackstick_buttons) {
 		/* Try [re-]establish link to F03. */
 		f30->f03 = rmi_find_function(fn->rmi_dev, 0x03);
-- 
cgit v1.2.3


From ed46e66cc1b3d684042f92dfa2ab15ee917b4cac Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Mon, 27 Feb 2017 14:30:25 +0200
Subject: iommu/io-pgtable-arm: Check for leaf entry before dereferencing it

Do a check for already installed leaf entry at the current level before
dereferencing it in order to avoid walking the page table down with
wrong pointer to the next level.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index feacc54bec68..f9bc6ebb8140 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -335,8 +335,12 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
 		__arm_lpae_set_pte(ptep, pte, cfg);
-	} else {
+	} else if (!iopte_leaf(pte, lvl)) {
 		cptep = iopte_deref(pte, data);
+	} else {
+		/* We require an unmap first */
+		WARN_ON(!selftest_running);
+		return -EEXIST;
 	}
 
 	/* Rinse, repeat */
-- 
cgit v1.2.3


From a03849e7210277fa212779b7cd9c30e1ab6194b2 Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Mon, 27 Feb 2017 14:30:26 +0200
Subject: iommu/io-pgtable-arm-v7s: Check for leaf entry before dereferencing
 it

Do a check for already installed leaf entry at the current level before
dereferencing it in order to avoid walking the page table down with
wrong pointer to the next level.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 1c049e2e12bf..8d6ca28c3e1f 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -422,8 +422,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 			pte |= ARM_V7S_ATTR_NS_TABLE;
 
 		__arm_v7s_set_pte(ptep, pte, 1, cfg);
-	} else {
+	} else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
 		cptep = iopte_deref(pte, lvl);
+	} else {
+		/* We require an unmap first */
+		WARN_ON(!selftest_running);
+		return -EEXIST;
 	}
 
 	/* Rinse, repeat */
-- 
cgit v1.2.3


From d8a8ed9758241e138933c67e40db2db2790eca19 Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Thu, 9 Mar 2017 13:21:07 -0500
Subject: drm/amd/amdgpu: Disable GFX_PG on Carrizo until compute issues solved

Currently compute jobs will stall if GFX_PG is enabled.  Until this
is resolved we'll disable GFX_PG.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 50bdb24ef8d6..4a785d6acfb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle)
 		/* rev0 hardware requires workarounds to support PG */
 		adev->pg_flags = 0;
 		if (adev->rev_id != 0x00) {
-			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+			adev->pg_flags |=
 				AMD_PG_SUPPORT_GFX_SMG |
 				AMD_PG_SUPPORT_GFX_PIPELINE |
 				AMD_PG_SUPPORT_CP |
-- 
cgit v1.2.3


From 607523d19c9d67ba4cf7bdaced644f11ed04992c Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 10 Mar 2017 12:13:04 +1000
Subject: drm/amdgpu: fix parser init error path to avoid crash in parser fini
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we don't reset the chunk info in the error path, the subsequent
fini path will double free.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d2d0f60ff36d..99424cb8020b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -240,6 +240,8 @@ free_partial_kdata:
 	for (; i >= 0; i--)
 		drm_free_large(p->chunks[i].kdata);
 	kfree(p->chunks);
+	p->chunks = NULL;
+	p->nchunks = 0;
 put_ctx:
 	amdgpu_ctx_put(p->ctx);
 free_chunk:
-- 
cgit v1.2.3


From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Mar 2017 13:17:18 +0100
Subject: kexec, x86/purgatory: Unbreak it and clean it up

The purgatory code defines global variables which are referenced via a
symbol lookup in the kexec code (core and arch).

A recent commit addressing sparse warnings made these static and thereby
broke kexec_file.

Why did this happen? Simply because the whole machinery is undocumented and
lacks any form of forward declarations. The variable names are unspecific
and lack a prefix, so adding forward declarations creates shadow variables
in the core code. Aside of that the code relies on magic constants and
duplicate struct definitions with no way to ensure that these things stay
in sync. The section placement of the purgatory variables happened by
chance and not by design.

Unbreak kexec and cleanup the mess:

 - Add proper forward declarations and document the usage
 - Use common struct definition
 - Use the proper common defines instead of magic constants
 - Add a purgatory_ prefix to have a proper name space
 - Use ARRAY_SIZE() instead of a homebrewn reimplementation
 - Add proper sections to the purgatory variables [ From Mike ]

Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static")
Reported-by: Mike Galbraith <<efault@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: "Tobin C. Harding" <me@tobin.cc>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/purgatory/trampoline.S | 12 ++++++------
 arch/x86/include/asm/purgatory.h    | 20 ++++++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c  |  9 ++++++---
 arch/x86/purgatory/purgatory.c      | 35 +++++++++++++++++------------------
 arch/x86/purgatory/purgatory.h      |  8 --------
 arch/x86/purgatory/setup-x86_64.S   |  2 +-
 arch/x86/purgatory/sha256.h         |  1 -
 include/linux/purgatory.h           | 23 +++++++++++++++++++++++
 kernel/kexec_file.c                 |  8 ++++----
 kernel/kexec_internal.h             |  6 +-----
 10 files changed, 78 insertions(+), 46 deletions(-)
 create mode 100644 arch/x86/include/asm/purgatory.h
 delete mode 100644 arch/x86/purgatory/purgatory.h
 create mode 100644 include/linux/purgatory.h

diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
index f9760ccf4032..3696ea6c4826 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -116,13 +116,13 @@ dt_offset:
 
 	.data
 	.balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
 	.skip	32
-	.size sha256_digest, . - sha256_digest
+	.size purgatory_sha256_digest, . - purgatory_sha256_digest
 
 	.balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
 	.skip	8 * 2 * 16
-	.size sha_regions, . - sha_regions
+	.size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
new file mode 100644
index 000000000000..d7da2729903d
--- /dev/null
+++ b/arch/x86/include/asm/purgatory.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif	/* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 307b1f4543de..857cdbd02867 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image)
 
 	/* Setup copying of backup region */
 	if (image->type == KEXEC_TYPE_CRASH) {
-		ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_dest",
 				&image->arch.backup_load_addr,
 				sizeof(image->arch.backup_load_addr), 0);
 		if (ret)
 			return ret;
 
-		ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_src",
 				&image->arch.backup_src_start,
 				sizeof(image->arch.backup_src_start), 0);
 		if (ret)
 			return ret;
 
-		ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_sz",
 				&image->arch.backup_src_sz,
 				sizeof(image->arch.backup_src_sz), 0);
 		if (ret)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index b6d5c8946e66..470edad96bb9 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -10,22 +10,19 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
 #include "sha256.h"
-#include "purgatory.h"
 #include "../boot/string.h"
 
-struct sha_region {
-	unsigned long start;
-	unsigned long len;
-};
-
-static unsigned long backup_dest;
-static unsigned long backup_src;
-static unsigned long backup_sz;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
 
-static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
 
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
 
 /*
  * On x86, second kernel requries first 640K of memory to boot. Copy
@@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {};
  */
 static int copy_backup_region(void)
 {
-	if (backup_dest)
-		memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+	if (purgatory_backup_dest) {
+		memcpy((void *)purgatory_backup_dest,
+		       (void *)purgatory_backup_src, purgatory_backup_sz);
+	}
 	return 0;
 }
 
 static int verify_sha256_digest(void)
 {
-	struct sha_region *ptr, *end;
+	struct kexec_sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
 	struct sha256_state sctx;
 
 	sha256_init(&sctx);
-	end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
-	for (ptr = sha_regions; ptr < end; ptr++)
+	end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+	for (ptr = purgatory_sha_regions; ptr < end; ptr++)
 		sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
 
 	sha256_final(&sctx, digest);
 
-	if (memcmp(digest, sha256_digest, sizeof(digest)))
+	if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
 		return 1;
 
 	return 0;
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
deleted file mode 100644
index e2e365a6c192..000000000000
--- a/arch/x86/purgatory/purgatory.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef PURGATORY_H
-#define PURGATORY_H
-
-#ifndef __ASSEMBLY__
-extern void purgatory(void);
-#endif	/* __ASSEMBLY__ */
-
-#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index f90e9dfa90bb..dfae9b9e60b5 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,7 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
-#include "purgatory.h"
+#include <asm/purgatory.h>
 
 	.text
 	.globl purgatory_start
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
index bd15a4127735..2867d9825a57 100644
--- a/arch/x86/purgatory/sha256.h
+++ b/arch/x86/purgatory/sha256.h
@@ -10,7 +10,6 @@
 #ifndef SHA256_H
 #define SHA256_H
 
-
 #include <linux/types.h>
 #include <crypto/sha.h>
 
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
new file mode 100644
index 000000000000..d60d4e278609
--- /dev/null
+++ b/include/linux/purgatory.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+	unsigned long start;
+	unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b56a558e406d..b118735fea9d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image)
 		ret = crypto_shash_final(desc, digest);
 		if (ret)
 			goto out_free_digest;
-		ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
-						sha_regions, sha_region_sz, 0);
+		ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+						     sha_regions, sha_region_sz, 0);
 		if (ret)
 			goto out_free_digest;
 
-		ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
-						digest, SHA256_DIGEST_SIZE, 0);
+		ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+						     digest, SHA256_DIGEST_SIZE, 0);
 		if (ret)
 			goto out_free_digest;
 	}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 4cef7e4706b0..799a8a452187 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image,
 extern struct mutex kexec_mutex;
 
 #ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
-	unsigned long start;
-	unsigned long len;
-};
-
+#include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
-- 
cgit v1.2.3


From 3fb632e40d7667d8bedfabc28850ac06d5493f54 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Mar 2017 11:27:23 +0800
Subject: md: fix super_offset endianness in super_1_rdev_size_change

The sb->super_offset should be big-endian, but the rdev->sb_start is in
host byte order, so fix this by adding cpu_to_le64.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index cd89ad3c3a0d..6e76d97a8fc3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1879,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
 	}
 	sb = page_address(rdev->sb_page);
 	sb->data_size = cpu_to_le64(num_sectors);
-	sb->super_offset = rdev->sb_start;
+	sb->super_offset = cpu_to_le64(rdev->sb_start);
 	sb->sb_csum = calc_sb_1_csum(sb);
 	do {
 		md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
-- 
cgit v1.2.3


From 1345921393ba23b60d3fcf15933e699232ad25ae Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Mar 2017 11:49:12 +0800
Subject: md: fix incorrect use of lexx_to_cpu in does_sb_need_changing

The sb->layout is of type __le32, so we shoud use le32_to_cpu.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6e76d97a8fc3..f6ae1d67bcd0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2287,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
 	/* Check if any mddev parameters have changed */
 	if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
 	    (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
-	    (mddev->layout != le64_to_cpu(sb->layout)) ||
+	    (mddev->layout != le32_to_cpu(sb->layout)) ||
 	    (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
 	    (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
 		return true;
-- 
cgit v1.2.3


From a4c2a13129f7c5bcf81704c06851601593303fd5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 28 Feb 2017 17:14:41 -0800
Subject: Input: i8042 - add TUXEDO BU1406 (N24_25BU) to the nomux list

TUXEDO BU1406 does not implement active multiplexing mode properly,
and takes around 550 ms in i8042_set_mux_mode(). Given that the
device does not have external AUX port, there is no downside in
disabling the MUX mode.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Suggested-by: Vojtech Pavlik <vojtech@suse.cz>
Reviewed-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index e856b073d533..312bd6ca9198 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -520,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
 		},
 	},
+	{
+		/* TUXEDO BU1406 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
+		},
+	},
 	{ }
 };
 
-- 
cgit v1.2.3


From 92ef6f97a66e580189a41a132d0f8a9f78d6ddce Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Fri, 10 Mar 2017 14:33:09 -0800
Subject: Input: elan_i2c - add ASUS EeeBook X205TA special touchpad fw

EeeBook X205TA is yet another ASUS device with a special touchpad
firmware that needs to be accounted for during initialization, or
else the touchpad will go into an invalid state upon suspend/resume.
Adding the appropriate ic_type and product_id check fixes the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Acked-by: KT Liao <kt.liao@emc.com.tw>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 352050e9031d..d5ab9ddef3e3 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -218,17 +218,19 @@ static int elan_query_product(struct elan_tp_data *data)
 
 static int elan_check_ASUS_special_fw(struct elan_tp_data *data)
 {
-	if (data->ic_type != 0x0E)
-		return false;
-
-	switch (data->product_id) {
-	case 0x05 ... 0x07:
-	case 0x09:
-	case 0x13:
+	if (data->ic_type == 0x0E) {
+		switch (data->product_id) {
+		case 0x05 ... 0x07:
+		case 0x09:
+		case 0x13:
+			return true;
+		}
+	} else if (data->ic_type == 0x08 && data->product_id == 0x26) {
+		/* ASUS EeeBook X205TA */
 		return true;
-	default:
-		return false;
 	}
+
+	return false;
 }
 
 static int __elan_initialize(struct elan_tp_data *data)
-- 
cgit v1.2.3


From 0134ed4fb9e78672ee9f7b18007114404c81e63f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 10 Mar 2017 13:24:22 -0700
Subject: device-dax: fix pmd/pte fault fallback handling

Jeff Moyer reports:

    With a device dax alignment of 4KB or 2MB, I get sigbus when running
    the attached fio job file for the current kernel (4.11.0-rc1+).  If
    I specify an alignment of 1GB, it works.

    I turned on debug output, and saw that it was failing in the huge
    fault code.

     dax dax1.0: dax_open
     dax dax1.0: dax_mmap
     dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 -
     dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60
     dax dax1.0: dax_release

    fio config for reproduce:
    [global]
    ioengine=dev-dax
    direct=0
    filename=/dev/dax0.0
    bs=2m

    [write]
    rw=write

    [read]
    stonewall
    rw=read

The driver fails to fallback when taking a fault that is larger than
the device alignment, or handling a larger fault when a smaller
mapping is already established. While we could support larger
mappings for a device with a smaller alignment, that change is
too large for the immediate fix. The simplest change is to force
fallback until the fault size matches the alignment.

Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Cc: <stable@vger.kernel.org>
Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 8d9829ff2a78..a284dc532e46 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -427,6 +427,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	int rc = VM_FAULT_SIGBUS;
 	phys_addr_t phys;
 	pfn_t pfn;
+	unsigned int fault_size = PAGE_SIZE;
 
 	if (check_vma(dax_dev, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -437,6 +438,9 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
+	if (fault_size != dax_region->align)
+		return VM_FAULT_SIGBUS;
+
 	phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
 		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
@@ -464,6 +468,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	phys_addr_t phys;
 	pgoff_t pgoff;
 	pfn_t pfn;
+	unsigned int fault_size = PMD_SIZE;
 
 	if (check_vma(dax_dev, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -480,6 +485,16 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
+	if (fault_size < dax_region->align)
+		return VM_FAULT_SIGBUS;
+	else if (fault_size > dax_region->align)
+		return VM_FAULT_FALLBACK;
+
+	/* if we are outside of the VMA */
+	if (pmd_addr < vmf->vma->vm_start ||
+			(pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
+		return VM_FAULT_SIGBUS;
+
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
 	if (phys == -1) {
-- 
cgit v1.2.3


From 70b085b06c4560a69e95607f77bb4c2b2e41943c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 10 Mar 2017 13:24:27 -0700
Subject: device-dax: fix pud fault fallback handling

Jeff Moyer reports:

    With a device dax alignment of 4KB or 2MB, I get sigbus when running
    the attached fio job file for the current kernel (4.11.0-rc1+).  If
    I specify an alignment of 1GB, it works.

    I turned on debug output, and saw that it was failing in the huge
    fault code.

     dax dax1.0: dax_open
     dax dax1.0: dax_mmap
     dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 -
     dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60)
     dax dax1.0: dax_release

    fio config for reproduce:
    [global]
    ioengine=dev-dax
    direct=0
    filename=/dev/dax0.0
    bs=2m

    [write]
    rw=write

    [read]
    stonewall
    rw=read

The driver fails to fallback when taking a fault that is larger than
the device alignment, or handling a larger fault when a smaller
mapping is already established. While we could support larger
mappings for a device with a smaller alignment, that change is
too large for the immediate fix. The simplest change is to force
fallback until the fault size matches the alignment.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index a284dc532e46..523fecec7bda 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -518,6 +518,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	phys_addr_t phys;
 	pgoff_t pgoff;
 	pfn_t pfn;
+	unsigned int fault_size = PUD_SIZE;
+
 
 	if (check_vma(dax_dev, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -534,6 +536,16 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
+	if (fault_size < dax_region->align)
+		return VM_FAULT_SIGBUS;
+	else if (fault_size > dax_region->align)
+		return VM_FAULT_FALLBACK;
+
+	/* if we are outside of the VMA */
+	if (pud_addr < vmf->vma->vm_start ||
+			(pud_addr + PUD_SIZE) > vmf->vma->vm_end)
+		return VM_FAULT_SIGBUS;
+
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
 	if (phys == -1) {
-- 
cgit v1.2.3


From 52084f89b38cdd896b59627c629915ef1a7bf615 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 9 Mar 2017 16:56:01 -0700
Subject: device-dax: fix debug output typo

The debug output for return the return data of pgoff_to_phys() in the
fault handlers has 'phys' and 'pgoff' incorrectly swapped.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 523fecec7bda..80c6db279ae1 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -443,7 +443,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 
 	phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
 				vmf->pgoff);
 		return VM_FAULT_SIGBUS;
 	}
@@ -498,7 +498,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
 				pgoff);
 		return VM_FAULT_SIGBUS;
 	}
@@ -549,7 +549,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
 				pgoff);
 		return VM_FAULT_SIGBUS;
 	}
-- 
cgit v1.2.3


From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:23 +0800
Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when
 booting"

Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting")

The mapping of "cpuid <-> nodeid" is established at boot time via ACPI
tables to keep associations of workqueues and other node related items
consistent across cpu hotplug.

But, ACPI tables are unreliable and failures with that boot time mapping
have been reported on machines where the ACPI table and the physical
information which is retrieved at actual hotplug is inconsistent.

Revert the mapping implementation so it can be replaced with a less error
prone approach.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c   |  2 +-
 drivers/acpi/acpi_processor.c |  5 ---
 drivers/acpi/bus.c            |  1 -
 drivers/acpi/processor_core.c | 73 -------------------------------------------
 include/linux/acpi.h          |  3 --
 5 files changed, 1 insertion(+), 83 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae32838cac5f..f6b0e87d2388 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void)
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 #include <acpi/processor.h>
 
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	int nid;
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 4467a8089ab8..5d208a99d0c9 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu)
 
 void __weak arch_unregister_cpu(int cpu) {}
 
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
-	return -ENODEV;
-}
-
 static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 {
 	unsigned long long sta;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 80cb5eb75b63..34fbe027e73a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1249,7 +1249,6 @@ static int __init acpi_init(void)
 	acpi_wakeup_device_init();
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
-	acpi_set_processor_mapping();
 	return 0;
 }
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 611a5585a902..a84386204659 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 }
 EXPORT_SYMBOL_GPL(acpi_get_cpuid);
 
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
-	int type, id;
-	u32 acpi_id;
-	acpi_status status;
-	acpi_object_type acpi_type;
-	unsigned long long tmp;
-	union acpi_object object = { 0 };
-	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-	status = acpi_get_type(handle, &acpi_type);
-	if (ACPI_FAILURE(status))
-		return false;
-
-	switch (acpi_type) {
-	case ACPI_TYPE_PROCESSOR:
-		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = object.processor.proc_id;
-
-		/* validate the acpi_id */
-		if(acpi_processor_validate_proc_id(acpi_id))
-			return false;
-		break;
-	case ACPI_TYPE_DEVICE:
-		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = tmp;
-		break;
-	default:
-		return false;
-	}
-
-	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
-	*phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
-	id = acpi_map_cpuid(*phys_id, acpi_id);
-
-	if (id < 0)
-		return false;
-	*cpuid = id;
-	return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
-			   void **rv)
-{
-	phys_cpuid_t phys_id;
-	int cpu_id;
-
-	if (!map_processor(handle, &phys_id, &cpu_id))
-		return AE_ERROR;
-
-	acpi_map_cpu2node(handle, cpu_id, phys_id);
-	return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
-	/* Set persistent cpu <-> node mapping for all processors. */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-			    ACPI_UINT32_MAX, set_processor_node_mapping,
-			    NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
 			 u64 *phys_addr, int *ioapic_id)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 673acda012af..63a7519b00cc 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id);
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
 		 int *pcpu);
 int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-void acpi_set_processor_mapping(void);
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
-- 
cgit v1.2.3


From 09c3f2bd5c7e5f18687663acb6adc6b167484ca5 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:24 +0800
Subject: Revert"x86/acpi: Enable MADT APIs to return disabled apicids"

Revert: 8ad893faf2ea ("x86/acpi: Enable MADT APIs to return disabled apicids")

Remove the leftovers of the boot time 'cpuid <-> nodeid' mapping approach.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-3-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/processor_core.c | 60 ++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 38 deletions(-)

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index a84386204659..b933061b6b60 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void)
 }
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
-		 u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
+		 u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_apic *lapic =
 		container_of(entry, struct acpi_madt_local_apic, header);
 
-	if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
+	if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (lapic->processor_id != acpi_id)
@@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_x2apic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-		bool ignore_disabled)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_x2apic *apic =
 		container_of(entry, struct acpi_madt_local_x2apic, header);
 
-	if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
+	if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (device_declaration && (apic->uid == acpi_id)) {
@@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_lsapic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-		bool ignore_disabled)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_sapic *lsapic =
 		container_of(entry, struct acpi_madt_local_sapic, header);
 
-	if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+	if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (device_declaration) {
@@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
  * Retrieve the ARM CPU physical identifier (MPIDR)
  */
 static int map_gicc_mpidr(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
-		bool ignore_disabled)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
 {
 	struct acpi_madt_generic_interrupt *gicc =
 	    container_of(entry, struct acpi_madt_generic_interrupt, header);
 
-	if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
+	if (!(gicc->flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	/* device_declaration means Device object in DSDT, in the
@@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
 }
 
 static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
-				   int type, u32 acpi_id, bool ignore_disabled)
+				   int type, u32 acpi_id)
 {
 	unsigned long madt_end, entry;
 	phys_cpuid_t phys_id = PHYS_CPUID_INVALID;	/* CPU hardware ID */
@@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
 		struct acpi_subtable_header *header =
 			(struct acpi_subtable_header *)entry;
 		if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
-			if (!map_lapic_id(header, acpi_id, &phys_id,
-					  ignore_disabled))
+			if (!map_lapic_id(header, acpi_id, &phys_id))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
-			if (!map_x2apic_id(header, type, acpi_id, &phys_id,
-					   ignore_disabled))
+			if (!map_x2apic_id(header, type, acpi_id, &phys_id))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
-			if (!map_lsapic_id(header, type, acpi_id, &phys_id,
-					   ignore_disabled))
+			if (!map_lsapic_id(header, type, acpi_id, &phys_id))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
-			if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
-					    ignore_disabled))
+			if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
 				break;
 		}
 		entry += header->length;
@@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id)
 	if (!madt)
 		return PHYS_CPUID_INVALID;
 
-	rv = map_madt_entry(madt, 1, acpi_id, true);
+	rv = map_madt_entry(madt, 1, acpi_id);
 
 	acpi_put_table((struct acpi_table_header *)madt);
 
 	return rv;
 }
 
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
-				  bool ignore_disabled)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
@@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
 
 	header = (struct acpi_subtable_header *)obj->buffer.pointer;
 	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
-		map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
+		map_lapic_id(header, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
-		map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+		map_lsapic_id(header, type, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
-		map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+		map_x2apic_id(header, type, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
-		map_gicc_mpidr(header, type, acpi_id, &phys_id,
-			       ignore_disabled);
+		map_gicc_mpidr(header, type, acpi_id, &phys_id);
 
 exit:
 	kfree(buffer.pointer);
 	return phys_id;
 }
 
-static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
-				       u32 acpi_id, bool ignore_disabled)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
 {
 	phys_cpuid_t phys_id;
 
-	phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
+	phys_id = map_mat_entry(handle, type, acpi_id);
 	if (invalid_phys_cpuid(phys_id))
-		phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
-					   ignore_disabled);
+		phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
 
 	return phys_id;
 }
 
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
-{
-	return __acpi_get_phys_id(handle, type, acpi_id, true);
-}
-
 int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
 {
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 2b85b3d22920db7473e5fed5719e7955c0ec323e Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:25 +0800
Subject: x86/acpi: Restore the order of CPU IDs

The following commits:

  f7c28833c2 ("x86/acpi: Enable acpi to register all possible cpus at
boot time") and 8f54969dc8 ("x86/acpi: Introduce persistent storage
for cpuid <-> apicid mapping")

... registered all the possible CPUs at boot time via ACPI tables to
make the mapping of cpuid <-> apicid fixed. Both enabled and disabled
CPUs could have a logical CPU ID after boot time.

But, ACPI tables are unreliable. the number amd order of Local APIC
entries which depends on the firmware is often inconsistent with the
physical devices. Even if they are consistent, The disabled CPUs which
take up some logical CPU IDs will also make the order discontinuous.

Revert the part of disabled CPUs registration, keep the allocation
logic of logical CPU IDs and also keep some code location changes.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-4-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c |  7 ++++++-
 arch/x86/kernel/apic/apic.c | 26 +++++++-------------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f6b0e87d2388..b2879cc23db4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 		return -EINVAL;
 	}
 
+	if (!enabled) {
+		++disabled_cpus;
+		return -EINVAL;
+	}
+
 	if (boot_cpu_physical_apicid != -1U)
 		ver = boot_cpu_apic_version;
 
-	cpu = __generic_processor_info(id, ver, enabled);
+	cpu = generic_processor_info(id, ver);
 	if (cpu >= 0)
 		early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index aee7deddabd0..8ccb7ef512e0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid)
 	return nr_logical_cpuids++;
 }
 
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
 {
 	int cpu, max = nr_cpu_ids;
 	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
 	if (num_processors >= nr_cpu_ids) {
 		int thiscpu = max + disabled_cpus;
 
-		if (enabled) {
-			pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-				   "reached. Processor %d/0x%x ignored.\n",
-				   max, thiscpu, apicid);
-		}
+		pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+			   "reached. Processor %d/0x%x ignored.\n",
+			   max, thiscpu, apicid);
 
 		disabled_cpus++;
 		return -EINVAL;
@@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
 		apic->x86_32_early_logical_apicid(cpu);
 #endif
 	set_cpu_possible(cpu, true);
-
-	if (enabled) {
-		num_processors++;
-		physid_set(apicid, phys_cpu_present_map);
-		set_cpu_present(cpu, true);
-	} else {
-		disabled_cpus++;
-	}
+	physid_set(apicid, phys_cpu_present_map);
+	set_cpu_present(cpu, true);
+	num_processors++;
 
 	return cpu;
 }
 
-int generic_processor_info(int apicid, int version)
-{
-	return __generic_processor_info(apicid, version, true);
-}
-
 int hard_smp_processor_id(void)
 {
 	return read_apic_id();
-- 
cgit v1.2.3


From 8c8cb30f49b86333d8e036e1945cf1a78c03577e Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:26 +0800
Subject: acpi/processor: Implement DEVICE operator for processor enumeration

ACPI allows to declare processors either with the PROCESSOR or with the
DEVICE operator. The current implementation handles only the PROCESSOR
operator.

On a system which uses the DEVICE operator for processor enumeration the
evaluation fails.

Check for the ACPI type of the ACPI handle and evaluate PROCESSOR and
DEVICE types separately.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-5-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/acpi_processor.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 5d208a99d0c9..9a98d7e00200 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -633,25 +633,50 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
 						  void **rv)
 {
 	acpi_status status;
+	acpi_object_type acpi_type;
+	unsigned long long uid;
 	union acpi_object object = { 0 };
 	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
 
-	status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+	status = acpi_get_type(handle, &acpi_type);
 	if (ACPI_FAILURE(status))
-		acpi_handle_info(handle, "Not get the processor object\n");
-	else
-		processor_validated_ids_update(object.processor.proc_id);
+		return false;
+
+	switch (acpi_type) {
+	case ACPI_TYPE_PROCESSOR:
+		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+		if (ACPI_FAILURE(status))
+			goto err;
+		uid = object.processor.proc_id;
+		break;
+
+	case ACPI_TYPE_DEVICE:
+		status = acpi_evaluate_integer(handle, "_UID", NULL, &uid);
+		if (ACPI_FAILURE(status))
+			goto err;
+		break;
+	default:
+		goto err;
+	}
+
+	processor_validated_ids_update(uid);
+	return true;
+
+err:
+	acpi_handle_info(handle, "Invalid processor object\n");
+	return false;
 
-	return AE_OK;
 }
 
-static void __init acpi_processor_check_duplicates(void)
+void __init acpi_processor_check_duplicates(void)
 {
-	/* Search all processor nodes in ACPI namespace */
+	/* check the correctness for all processors in ACPI namespace */
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 						ACPI_UINT32_MAX,
 						acpi_processor_ids_walk,
 						NULL, NULL, NULL);
+	acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk,
+						NULL, NULL);
 }
 
 bool __init acpi_processor_validate_proc_id(int proc_id)
-- 
cgit v1.2.3


From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:27 +0800
Subject: acpi/processor: Check for duplicate processor ids at hotplug time

The check for duplicate processor ids happens at boot time based on the
ACPI table contents, but the final sanity checks for a processor happen
at hotplug time.

At hotplug time, where the physical information is available, which might
differ from the ACPI table information, a check for duplicate processor
ids is missing.

Add it to the hotplug checks and rename the function so it better
reflects its purpose.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/acpi_processor.c | 13 ++++++++++---
 include/linux/acpi.h          |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 9a98d7e00200..0143135b3abe 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -280,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device)
 		pr->acpi_id = value;
 	}
 
+	if (acpi_duplicate_processor_id(pr->acpi_id)) {
+		dev_err(&device->dev,
+			"Failed to get unique processor _UID (0x%x)\n",
+			pr->acpi_id);
+		return -ENODEV;
+	}
+
 	pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration,
 					pr->acpi_id);
 	if (invalid_phys_cpuid(pr->phys_id))
@@ -580,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = {
 static int nr_unique_ids __initdata;
 
 /* The number of the duplicate processor IDs */
-static int nr_duplicate_ids __initdata;
+static int nr_duplicate_ids;
 
 /* Used to store the unique processor IDs */
 static int unique_processor_ids[] __initdata = {
@@ -588,7 +595,7 @@ static int unique_processor_ids[] __initdata = {
 };
 
 /* Used to store the duplicate processor IDs */
-static int duplicate_processor_ids[] __initdata = {
+static int duplicate_processor_ids[] = {
 	[0 ... NR_CPUS - 1] = -1,
 };
 
@@ -679,7 +686,7 @@ void __init acpi_processor_check_duplicates(void)
 						NULL, NULL);
 }
 
-bool __init acpi_processor_validate_proc_id(int proc_id)
+bool acpi_duplicate_processor_id(int proc_id)
 {
 	int i;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63a7519b00cc..9b05886f9773 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
 }
 
 /* Validate the processor object's proc_id */
-bool acpi_processor_validate_proc_id(int proc_id);
+bool acpi_duplicate_processor_id(int proc_id);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-- 
cgit v1.2.3


From 0acf611997d9d05dbfb559c3c6e379c861eb5957 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Feb 2017 11:07:57 -0800
Subject: score: Fix implicit includes now failing build after extable change

After changing from module.h to extable.h, score builds fail with:

  arch/score/kernel/traps.c: In function 'do_ri':
  arch/score/kernel/traps.c:248:4: error: implicit declaration of function 'user_disable_single_step'
  arch/score/mm/extable.c: In function 'fixup_exception':
  arch/score/mm/extable.c:32:38: error: dereferencing pointer to incomplete type
  arch/score/mm/extable.c:34:24: error: dereferencing pointer to incomplete type

because extable.h doesn't drag in the same amount of headers as the
module.h did.  Add in the headers which were implicitly expected.

Fixes: 90858794c960 ("module.h: remove extable.h include now users have migrated")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
[PG: tweak commit log; refresh for sched header refactoring.]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/score/kernel/traps.c | 1 +
 arch/score/mm/extable.c   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index e359ec675869..12daf45369b4 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c
index ec871355fc2d..6736a3ad6286 100644
--- a/arch/score/mm/extable.c
+++ b/arch/score/mm/extable.c
@@ -24,6 +24,8 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
-- 
cgit v1.2.3


From 33d8c15559df4f0bce25d7e16ebb5879e249f2e7 Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Thu, 9 Mar 2017 17:58:38 +0100
Subject: Revert "clocksource/drivers/tcb_clksrc: Use 32 bit tcb as
 sched_clock"

This reverts commit 7b9f1d16e6d1 ("clocksource/drivers/tcb_clksrc: Use
32 bit tcb as sched_clock"). In the current state, the kernel warns
against a late registration of the new sched_clock, the printk clock
resets after only a few minutes, and it seems that scheduling can be
affected as well.

Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/tcb_clksrc.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 745844ee973e..d4ca9962a759 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -10,7 +10,6 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/atmel_tc.h>
-#include <linux/sched_clock.h>
 
 
 /*
@@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs)
 	return (upper << 16) | lower;
 }
 
-static u32 tc_get_cv32(void)
-{
-	return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
-}
-
 static u64 tc_get_cycles32(struct clocksource *cs)
 {
-	return tc_get_cv32();
+	return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
 }
 
 static struct clocksource clksrc = {
@@ -75,11 +69,6 @@ static struct clocksource clksrc = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static u64 notrace tc_read_sched_clock(void)
-{
-	return tc_get_cv32();
-}
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 struct tc_clkevt_device {
@@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void)
 		clksrc.read = tc_get_cycles32;
 		/* setup ony channel 0 */
 		tcb_setup_single_chan(tc, best_divisor_idx);
-
-		/* register sched_clock on chips with single 32 bit counter */
-		sched_clock_register(tc_read_sched_clock, 32, divided_rate);
 	} else {
 		/* tclib will give us three clocks no matter what the
 		 * underlying platform supports.
-- 
cgit v1.2.3


From f5fe1b51905df7cfe4fdfd85c5fb7bc5b71a094f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 10 Mar 2017 17:00:47 +1100
Subject: blk: Ensure users for current->bio_list can see the full list.

Commit 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()")
changed current->bio_list so that it did not contain *all* of the
queued bios, but only those submitted by the currently running
make_request_fn.

There are two places which walk the list and requeue selected bios,
and others that check if the list is empty.  These are no longer
correct.

So redefine current->bio_list to point to an array of two lists, which
contain all queued bios, and adjust various code to test or walk both
lists.

Signed-off-by: NeilBrown <neilb@suse.com>
Fixes: 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c         | 12 +++++++++---
 block/blk-core.c    | 30 ++++++++++++++++++------------
 drivers/md/dm.c     | 29 ++++++++++++++++-------------
 drivers/md/raid10.c |  3 ++-
 4 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 5eec5e08417f..e75878f8b14a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	bio_list_init(&punt);
 	bio_list_init(&nopunt);
 
-	while ((bio = bio_list_pop(current->bio_list)))
+	while ((bio = bio_list_pop(&current->bio_list[0])))
 		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+	current->bio_list[0] = nopunt;
 
-	*current->bio_list = nopunt;
+	bio_list_init(&nopunt);
+	while ((bio = bio_list_pop(&current->bio_list[1])))
+		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+	current->bio_list[1] = nopunt;
 
 	spin_lock(&bs->rescue_lock);
 	bio_list_merge(&bs->rescue_list, &punt);
@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		 * we retry with the original gfp_flags.
 		 */
 
-		if (current->bio_list && !bio_list_empty(current->bio_list))
+		if (current->bio_list &&
+		    (!bio_list_empty(&current->bio_list[0]) ||
+		     !bio_list_empty(&current->bio_list[1])))
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/block/blk-core.c b/block/blk-core.c
index 0eeb99ef654f..d772c221cc17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1973,7 +1973,14 @@ end_io:
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
-	struct bio_list bio_list_on_stack;
+	/*
+	 * bio_list_on_stack[0] contains bios submitted by the current
+	 * make_request_fn.
+	 * bio_list_on_stack[1] contains bios that were submitted before
+	 * the current make_request_fn, but that haven't been processed
+	 * yet.
+	 */
+	struct bio_list bio_list_on_stack[2];
 	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (!generic_make_request_checks(bio))
@@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * should be added at the tail
 	 */
 	if (current->bio_list) {
-		bio_list_add(current->bio_list, bio);
+		bio_list_add(&current->bio_list[0], bio);
 		goto out;
 	}
 
@@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * bio_list, and call into ->make_request() again.
 	 */
 	BUG_ON(bio->bi_next);
-	bio_list_init(&bio_list_on_stack);
-	current->bio_list = &bio_list_on_stack;
+	bio_list_init(&bio_list_on_stack[0]);
+	current->bio_list = bio_list_on_stack;
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		if (likely(blk_queue_enter(q, false) == 0)) {
-			struct bio_list hold;
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
-			hold = bio_list_on_stack;
-			bio_list_init(&bio_list_on_stack);
+			bio_list_on_stack[1] = bio_list_on_stack[0];
+			bio_list_init(&bio_list_on_stack[0]);
 			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
@@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio)
 			 */
 			bio_list_init(&lower);
 			bio_list_init(&same);
-			while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
 				if (q == bdev_get_queue(bio->bi_bdev))
 					bio_list_add(&same, bio);
 				else
 					bio_list_add(&lower, bio);
 			/* now assemble so we handle the lowest level first */
-			bio_list_merge(&bio_list_on_stack, &lower);
-			bio_list_merge(&bio_list_on_stack, &same);
-			bio_list_merge(&bio_list_on_stack, &hold);
+			bio_list_merge(&bio_list_on_stack[0], &lower);
+			bio_list_merge(&bio_list_on_stack[0], &same);
+			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
 			bio_io_error(bio);
 		}
-		bio = bio_list_pop(current->bio_list);
+		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f4ffd1eb8f44..dfb75979e455 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -989,26 +989,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 	struct dm_offload *o = container_of(cb, struct dm_offload, cb);
 	struct bio_list list;
 	struct bio *bio;
+	int i;
 
 	INIT_LIST_HEAD(&o->cb.list);
 
 	if (unlikely(!current->bio_list))
 		return;
 
-	list = *current->bio_list;
-	bio_list_init(current->bio_list);
-
-	while ((bio = bio_list_pop(&list))) {
-		struct bio_set *bs = bio->bi_pool;
-		if (unlikely(!bs) || bs == fs_bio_set) {
-			bio_list_add(current->bio_list, bio);
-			continue;
+	for (i = 0; i < 2; i++) {
+		list = current->bio_list[i];
+		bio_list_init(&current->bio_list[i]);
+
+		while ((bio = bio_list_pop(&list))) {
+			struct bio_set *bs = bio->bi_pool;
+			if (unlikely(!bs) || bs == fs_bio_set) {
+				bio_list_add(&current->bio_list[i], bio);
+				continue;
+			}
+
+			spin_lock(&bs->rescue_lock);
+			bio_list_add(&bs->rescue_list, bio);
+			queue_work(bs->rescue_workqueue, &bs->rescue_work);
+			spin_unlock(&bs->rescue_lock);
 		}
-
-		spin_lock(&bs->rescue_lock);
-		bio_list_add(&bs->rescue_list, bio);
-		queue_work(bs->rescue_workqueue, &bs->rescue_work);
-		spin_unlock(&bs->rescue_lock);
 	}
 }
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 063c43d83b72..0536658c9d40 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
 				    !conf->barrier ||
 				    (atomic_read(&conf->nr_pending) &&
 				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
+				     (!bio_list_empty(&current->bio_list[0]) ||
+				      !bio_list_empty(&current->bio_list[1]))),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 		if (!conf->nr_waiting)
-- 
cgit v1.2.3


From 2c4ea6e28dbf15ab93632c5c189f3948366b8885 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 11 Mar 2017 01:31:19 +0100
Subject: x86/tlb: Fix tlb flushing when lguest clears PGE

Fengguang reported random corruptions from various locations on x86-32
after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and
9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set")
that uses the former. While x86-32 doesn't have a JIT like x86_64, the
bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to
ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module
support built in and therefore never had the DEBUG_SET_MODULE_RONX setting
enabled.

After investigating the crashes further, it turned out that using
set_memory_ro() and set_memory_rw() didn't have the desired effect, for
example, setting the pages as read-only on x86-32 would still let
probe_kernel_write() succeed without error. This behavior would manifest
itself in situations where the vmalloc'ed buffer was accessed prior to
set_memory_*() such as in case of bpf_prog_alloc(). In cases where it
wasn't, the page attribute changes seemed to have taken effect, leading to
the conclusion that a TLB invalidate didn't happen. Moreover, it turned out
that this issue reproduced with qemu in "-cpu kvm64" mode, but not for
"-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger
a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(),
though.

There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends
on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush
(depends on X86_FEATURE_PGE), and cr3 based flush.  For "-cpu host" case in
my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu
kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually
worked fine, and further investigating the cr4 one turned out that
X86_CR4_PGE bit was not set in cr4 register, meaning the
__native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same
value instead of clearing X86_CR4_PGE in the first write to trigger the
flush.

It turned out that X86_CR4_PGE was cleared from cr4 during init from
lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also
cleared from there due to concerns of using PGE in guest kernel that can
lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V:
Host") in init()). The CPU feature bits are cleared in dynamic
boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses
static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB
flushing to use, meaning they still used the old setting of the host
kernel.

Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate
to static_cpu_has() checks is too late at this point as sections have been
patched already, so for now, it seems reasonable to switch back to
boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b
("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via
cr3 as originally intended, properly makes the new page attributes visible
and thus fixes the crashes seen by Fengguang.

Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: bp@suse.de
Cc: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: lkp@01.org
Cc: Laura Abbott <labbott@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com
Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/tlbflush.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6fa85944af83..fc5abff9b7fd 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 
 static inline void __flush_tlb_all(void)
 {
-	if (static_cpu_has(X86_FEATURE_PGE))
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__flush_tlb_global();
 	else
 		__flush_tlb();
-- 
cgit v1.2.3


From 4495c08e84729385774601b5146d51d9e5849f81 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Mar 2017 14:47:08 -0700
Subject: Linux 4.11-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 165cf9783a5d..b841fb36beb2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From d1c4e9bf73e739b937ddd9dc4cf0f6de2e6117da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@gmail.com>
Date: Mon, 20 Feb 2017 14:50:23 -0500
Subject: platform/x86: asus-wmi: Remove quirk_no_rfkill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the detection introduced in the previous patches, we don't need
these static DMI-based quirks anymore.

This reverts the following commits:
56a37a72002b "asus-wmi: Add quirk_no_rfkill_wapf4 for the Asus X456UA"
a961a285b479 "asus-wmi: Add quirk_no_rfkill_wapf4 for the Asus X456UF"
6b7ff2af5286 "asus-wmi: Add quirk_no_rfkill for the Asus Z550MA"
02db9ff7af18 "asus-wmi: Add quirk_no_rfkill for the Asus U303LB"
2d735244b798 "asus-wmi: Add quirk_no_rfkill for the Asus N552VW"
a977e59c0c67 "asus-wmi: Create quirk for airplane_mode LED"

Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[dvhart: minor commit message corrections]
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/asus-nb-wmi.c | 49 ++------------------------------------
 drivers/platform/x86/asus-wmi.c    |  5 +---
 drivers/platform/x86/asus-wmi.h    |  1 -
 3 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 5be4783e40d4..dea98ffb6f60 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -103,15 +103,6 @@ static struct quirk_entry quirk_asus_x200ca = {
 	.wapf = 2,
 };
 
-static struct quirk_entry quirk_no_rfkill = {
-	.no_rfkill = true,
-};
-
-static struct quirk_entry quirk_no_rfkill_wapf4 = {
-	.wapf = 4,
-	.no_rfkill = true,
-};
-
 static struct quirk_entry quirk_asus_ux303ub = {
 	.wmi_backlight_native = true,
 };
@@ -194,7 +185,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
 		},
-		.driver_data = &quirk_no_rfkill_wapf4,
+		.driver_data = &quirk_asus_wapf4,
 	},
 	{
 		.callback = dmi_matched,
@@ -203,7 +194,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
 		},
-		.driver_data = &quirk_no_rfkill_wapf4,
+		.driver_data = &quirk_asus_wapf4,
 	},
 	{
 		.callback = dmi_matched,
@@ -367,42 +358,6 @@ static const struct dmi_system_id asus_quirks[] = {
 		},
 		.driver_data = &quirk_asus_x200ca,
 	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. X555UB",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. N552VW",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. U303LB",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. Z550MA",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
 	{
 		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. UX303UB",
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 8499d3ae4257..8fe5890bf539 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -2111,10 +2111,7 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
 		asus->driver->wlan_ctrl_by_user = 1;
 
-	if (asus->driver->wlan_ctrl_by_user && ashs_present())
-		asus->driver->quirks->no_rfkill = 1;
-
-	if (!asus->driver->quirks->no_rfkill) {
+	if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
 		err = asus_wmi_rfkill_init(asus);
 		if (err)
 			goto fail_rfkill;
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index fdff626c3b51..c9589d9342bb 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -39,7 +39,6 @@ struct key_entry;
 struct asus_wmi;
 
 struct quirk_entry {
-	bool no_rfkill;
 	bool hotplug_wireless;
 	bool scalar_panel_brightness;
 	bool store_backlight_power;
-- 
cgit v1.2.3


From bafa687dccbe16cbf9b1824409836d79d6dc6543 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 23 Feb 2017 12:31:54 +0200
Subject: extcon: int3496: Propagate error code of gpiod_to_irq()

gpiod_to_irq() doesn't return 0. Thus, we just adjust condition and
replace -EINVAL by actual error code it returns.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index a3131b036de6..38eb6cab938f 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -100,9 +100,9 @@ static int int3496_probe(struct platform_device *pdev)
 	}
 
 	data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id);
-	if (data->usb_id_irq <= 0) {
+	if (data->usb_id_irq < 0) {
 		dev_err(dev, "can't get USB ID IRQ: %d\n", data->usb_id_irq);
-		return -EINVAL;
+		return data->usb_id_irq;
 	}
 
 	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en",
-- 
cgit v1.2.3


From ecd052250b51c8cee4d9720adea05690dfc77c64 Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Thu, 9 Mar 2017 13:28:33 -0500
Subject: net: ethernet: aquantia: call set_irq_affinity_hint before free_irq

When a network interface controlled by the aquantia ethernet driver is brought
down a warning is output in dmesg (see below).

The problem is that aq_pci_func_free_irqs() is calling free_irq() before it is
calling irq_set_affinity_hint().

WARNING: CPU: 4 PID: 10068 at kernel/irq/manage.c:1503 __free_irq+0x24d/0x2b0
<snip>
Call Trace:
 dump_stack+0x63/0x87
 __warn+0xd1/0xf0
 warn_slowpath_null+0x1d/0x20
 __free_irq+0x24d/0x2b0
 free_irq+0x39/0x90
 aq_pci_func_free_irqs+0x52/0xa0 [atlantic]
 aq_nic_stop+0xca/0xd0 [atlantic]
 aq_ndev_close+0x1d/0x40 [atlantic]
 __dev_close_many+0x99/0x100
 __dev_close+0x67/0xb0
<snip>

Fixes: 36a4a50f4048 ("net: ethernet: aquantia: switch to pci_alloc_irq_vectors")

Cc: Christoph Hellwig <hch@lst.de>
Cc: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David Arcari <darcari@redhat.com>
Tested-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 581de71a958a..4c6c882c6a1c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -213,9 +213,9 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self)
 		if (!((1U << i) & self->msix_entry_mask))
 			continue;
 
-		free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
 		if (pdev->msix_enabled)
 			irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
+		free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
 		self->msix_entry_mask &= ~(1U << i);
 	}
 }
-- 
cgit v1.2.3


From 7ce101246655935b014b11d81f815342921f5654 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 9 Mar 2017 14:58:29 -0800
Subject: netvsc: handle select_queue when device is being removed

Move the send indirection table from the inner device (netvsc)
to the network device context.

It is possible that netvsc_device is not present (remove in progress).
This solves potential use after free issues when packet is being
created during MTU change, shutdown, or queue count changes.

Fixes: d8e18ee0fa96 ("netvsc: enhance transmit select_queue")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h |  3 ++-
 drivers/net/hyperv/netvsc.c     |  8 ++------
 drivers/net/hyperv/netvsc_drv.c | 11 +++--------
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d3e73ac158ae..f9f3dba7a588 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -700,6 +700,8 @@ struct net_device_context {
 
 	u32 tx_checksum_mask;
 
+	u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+
 	/* Ethtool settings */
 	u8 duplex;
 	u32 speed;
@@ -757,7 +759,6 @@ struct netvsc_device {
 
 	struct nvsp_message revoke_packet;
 
-	u32 send_table[VRSS_SEND_TAB_SIZE];
 	u32 max_chn;
 	u32 num_chn;
 	spinlock_t sc_lock; /* Protects num_sc_offered variable */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d35ebd993b38..4c1d8cca247b 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1136,15 +1136,11 @@ static void netvsc_receive(struct net_device *ndev,
 static void netvsc_send_table(struct hv_device *hdev,
 			      struct nvsp_message *nvmsg)
 {
-	struct netvsc_device *nvscdev;
 	struct net_device *ndev = hv_get_drvdata(hdev);
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	int i;
 	u32 count, *tab;
 
-	nvscdev = get_outbound_net_device(hdev);
-	if (!nvscdev)
-		return;
-
 	count = nvmsg->msg.v5_msg.send_table.count;
 	if (count != VRSS_SEND_TAB_SIZE) {
 		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1155,7 +1151,7 @@ static void netvsc_send_table(struct hv_device *hdev,
 		      nvmsg->msg.v5_msg.send_table.offset);
 
 	for (i = 0; i < count; i++)
-		nvscdev->send_table[i] = tab[i];
+		net_device_ctx->tx_send_table[i] = tab[i];
 }
 
 static void netvsc_send_vf(struct net_device_context *net_device_ctx,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index bc05c895d958..5ede87f30463 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+	unsigned int num_tx_queues = ndev->real_num_tx_queues;
 	struct sock *sk = skb->sk;
 	int q_idx = sk_tx_queue_get(sk);
 
-	if (q_idx < 0 || skb->ooo_okay ||
-	    q_idx >= ndev->real_num_tx_queues) {
+	if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
 		u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
 		int new_idx;
 
-		new_idx = nvsc_dev->send_table[hash]
-			% nvsc_dev->num_chn;
+		new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
 
 		if (q_idx != new_idx && sk &&
 		    sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
@@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 		q_idx = new_idx;
 	}
 
-	if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
-		q_idx = 0;
-
 	return q_idx;
 }
 
-- 
cgit v1.2.3


From 88a7cddce2506b0b6c06a9f6e51379d0d275353b Mon Sep 17 00:00:00 2001
From: Neil Jerram <neil@tigera.io>
Date: Fri, 10 Mar 2017 12:24:57 +0000
Subject: Make IP 'forwarding' doc more precise

It wasn't clear if the 'forwarding' setting needs to be enabled on the
interface that packets are received from, or on the interface that
packets are forwarded to, or both.

In fact (according to my code reading) the setting is relevant on the
interface that packets are received from, so this change updates the doc
to say that.

Signed-off-by: Neil Jerram <neil@tigera.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fc73eeb7b3b8..ab0230461377 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1006,7 +1006,8 @@ accept_redirects - BOOLEAN
 		FALSE (router)
 
 forwarding - BOOLEAN
-	Enable IP forwarding on this interface.
+	Enable IP forwarding on this interface.  This controls whether packets
+	received _on_ this interface can be forwarded.
 
 mc_forwarding - BOOLEAN
 	Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
-- 
cgit v1.2.3


From 52491c7607c5527138095edf44c53169dc1ddb82 Mon Sep 17 00:00:00 2001
From: Etienne Noss <etienne.noss@wifirst.fr>
Date: Fri, 10 Mar 2017 16:55:32 +0100
Subject: act_connmark: avoid crashing on malformed nlattrs with null parms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tcf_connmark_init does not check in its configuration if TCA_CONNMARK_PARMS
is set, resulting in a null pointer dereference when trying to access it.

[501099.043007] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[501099.043039] IP: [<ffffffffc10c60fb>] tcf_connmark_init+0x8b/0x180 [act_connmark]
...
[501099.044334] Call Trace:
[501099.044345]  [<ffffffffa47270e8>] ? tcf_action_init_1+0x198/0x1b0
[501099.044363]  [<ffffffffa47271b0>] ? tcf_action_init+0xb0/0x120
[501099.044380]  [<ffffffffa47250a4>] ? tcf_exts_validate+0xc4/0x110
[501099.044398]  [<ffffffffc0f5fa97>] ? u32_set_parms+0xa7/0x270 [cls_u32]
[501099.044417]  [<ffffffffc0f60bf0>] ? u32_change+0x680/0x87b [cls_u32]
[501099.044436]  [<ffffffffa4725d1d>] ? tc_ctl_tfilter+0x4dd/0x8a0
[501099.044454]  [<ffffffffa44a23a1>] ? security_capable+0x41/0x60
[501099.044471]  [<ffffffffa470ca01>] ? rtnetlink_rcv_msg+0xe1/0x220
[501099.044490]  [<ffffffffa470c920>] ? rtnl_newlink+0x870/0x870
[501099.044507]  [<ffffffffa472cc61>] ? netlink_rcv_skb+0xa1/0xc0
[501099.044524]  [<ffffffffa47073f4>] ? rtnetlink_rcv+0x24/0x30
[501099.044541]  [<ffffffffa472c634>] ? netlink_unicast+0x184/0x230
[501099.044558]  [<ffffffffa472c9d8>] ? netlink_sendmsg+0x2f8/0x3b0
[501099.044576]  [<ffffffffa46d8880>] ? sock_sendmsg+0x30/0x40
[501099.044592]  [<ffffffffa46d8e03>] ? SYSC_sendto+0xd3/0x150
[501099.044608]  [<ffffffffa425fda1>] ? __do_page_fault+0x2d1/0x510
[501099.044626]  [<ffffffffa47fbd7b>] ? system_call_fast_compare_end+0xc/0x9b

Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action")
Signed-off-by: Étienne Noss <etienne.noss@wifirst.fr>
Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_connmark.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index ab8062909962..f9bb43c25697 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 	if (ret < 0)
 		return ret;
 
+	if (!tb[TCA_CONNMARK_PARMS])
+		return -EINVAL;
+
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
 	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-- 
cgit v1.2.3


From e37791ec1ad785b59022ae211f63a16189bacebf Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 10 Mar 2017 09:46:15 -0800
Subject: mpls: Send route delete notifications when router module is unloaded

When the mpls_router module is unloaded, mpls routes are deleted but
notifications are not sent to userspace leaving userspace caches
out of sync. Add the call to mpls_notify_route in mpls_net_exit as
routes are freed.

Fixes: 0189197f44160 ("mpls: Basic routing support")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 3818686182b2..a1477989ed0b 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2028,6 +2028,7 @@ static void mpls_net_exit(struct net *net)
 	for (index = 0; index < platform_labels; index++) {
 		struct mpls_route *rt = rtnl_dereference(platform_label[index]);
 		RCU_INIT_POINTER(platform_label[index], NULL);
+		mpls_notify_route(net, index, rt, NULL, NULL);
 		mpls_rt_free(rt);
 	}
 	rtnl_unlock();
-- 
cgit v1.2.3


From b17075d5c1988b83f840d272c795ac17d57ce804 Mon Sep 17 00:00:00 2001
From: Igor Druzhinin <igor.druzhinin@citrix.com>
Date: Fri, 10 Mar 2017 21:36:22 +0000
Subject: xen-netback: fix race condition on XenBus disconnect

In some cases during XenBus disconnect event handling and subsequent
queue resource release there may be some TX handlers active on
other processors. Use RCU in order to synchronize with them.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/interface.c | 26 +++++++++++++++++---------
 drivers/net/xen-netback/netback.c   |  2 +-
 drivers/net/xen-netback/xenbus.c    | 20 ++++++++++----------
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 829b26cd4549..8397f6c92451 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	struct xenvif_queue *queue = NULL;
-	unsigned int num_queues = vif->num_queues;
+	unsigned int num_queues;
 	u16 index;
 	struct xenvif_rx_cb *cb;
 
 	BUG_ON(skb->dev != dev);
 
-	/* Drop the packet if queues are not set up */
+	/* Drop the packet if queues are not set up.
+	 * This handler should be called inside an RCU read section
+	 * so we don't need to enter it here explicitly.
+	 */
+	num_queues = READ_ONCE(vif->num_queues);
 	if (num_queues < 1)
 		goto drop;
 
@@ -222,18 +226,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues;
 	u64 rx_bytes = 0;
 	u64 rx_packets = 0;
 	u64 tx_bytes = 0;
 	u64 tx_packets = 0;
 	unsigned int index;
 
-	spin_lock(&vif->lock);
-	if (vif->queues == NULL)
-		goto out;
+	rcu_read_lock();
+	num_queues = READ_ONCE(vif->num_queues);
 
 	/* Aggregate tx and rx stats from each queue */
-	for (index = 0; index < vif->num_queues; ++index) {
+	for (index = 0; index < num_queues; ++index) {
 		queue = &vif->queues[index];
 		rx_bytes += queue->stats.rx_bytes;
 		rx_packets += queue->stats.rx_packets;
@@ -241,8 +245,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 		tx_packets += queue->stats.tx_packets;
 	}
 
-out:
-	spin_unlock(&vif->lock);
+	rcu_read_unlock();
 
 	vif->dev->stats.rx_bytes = rx_bytes;
 	vif->dev->stats.rx_packets = rx_packets;
@@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 * data)
 {
 	struct xenvif *vif = netdev_priv(dev);
-	unsigned int num_queues = vif->num_queues;
+	unsigned int num_queues;
 	int i;
 	unsigned int queue_index;
 
+	rcu_read_lock();
+	num_queues = READ_ONCE(vif->num_queues);
+
 	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
 		unsigned long accum = 0;
 		for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
 		}
 		data[i] = accum;
 	}
+
+	rcu_read_unlock();
 }
 
 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f9bcf4a665bc..602d408fa25e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
 	netdev_err(vif->dev, "fatal error; disabling device\n");
 	vif->disabled = true;
 	/* Disable the vif from queue 0's kthread */
-	if (vif->queues)
+	if (vif->num_queues)
 		xenvif_kick_thread(&vif->queues[0]);
 }
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d2d7cd9145b1..a56d3eab35dd 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be)
 	struct xenvif *vif = be->vif;
 
 	if (vif) {
+		unsigned int num_queues = vif->num_queues;
 		unsigned int queue_index;
-		struct xenvif_queue *queues;
 
 		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
 		xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
 		xenvif_disconnect_data(vif);
-		for (queue_index = 0;
-		     queue_index < vif->num_queues;
-		     ++queue_index)
-			xenvif_deinit_queue(&vif->queues[queue_index]);
 
-		spin_lock(&vif->lock);
-		queues = vif->queues;
+		/* At this point some of the handlers may still be active
+		 * so we need to have additional synchronization here.
+		 */
 		vif->num_queues = 0;
-		vif->queues = NULL;
-		spin_unlock(&vif->lock);
+		synchronize_net();
 
-		vfree(queues);
+		for (queue_index = 0; queue_index < num_queues; ++queue_index)
+			xenvif_deinit_queue(&vif->queues[queue_index]);
+
+		vfree(vif->queues);
+		vif->queues = NULL;
 
 		xenvif_disconnect_ctrl(vif);
 	}
-- 
cgit v1.2.3


From 79099aab38c8f5c746748b066ae74ba984fe2cc8 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 10 Mar 2017 14:11:39 -0800
Subject: mpls: Do not decrement alive counter for unregister events

Multipath routes can be rendered usesless when a device in one of the
paths is deleted. For example:

$ ip -f mpls ro ls
100
	nexthop as to 200 via inet 172.16.2.2  dev virt12
	nexthop as to 300 via inet 172.16.3.2  dev br0
101
	nexthop as to 201 via inet6 2000:2::2  dev virt12
	nexthop as to 301 via inet6 2000:3::2  dev br0

$ ip li del br0

When br0 is deleted the other hop is not considered in
mpls_select_multipath because of the alive check -- rt_nhn_alive
is 0.

rt_nhn_alive is decremented once in mpls_ifdown when the device is taken
down (NETDEV_DOWN) and again when it is deleted (NETDEV_UNREGISTER). For
a 2 hop route, deleting one device drops the alive count to 0. Since
devices are taken down before unregistering, the decrement on
NETDEV_UNREGISTER is redundant.

Fixes: c89359a42e2a4 ("mpls: support for dead routes")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index a1477989ed0b..33211f9a2656 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1288,7 +1288,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
 				/* fall through */
 			case NETDEV_CHANGE:
 				nh->nh_flags |= RTNH_F_LINKDOWN;
-				ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+				if (event != NETDEV_UNREGISTER)
+					ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
 				break;
 			}
 			if (event == NETDEV_UNREGISTER)
-- 
cgit v1.2.3


From 1da8ac7c49fb2879ba95006d8bd1095e6870ea1a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 10 Mar 2017 22:05:55 -0800
Subject: selftests/bpf: fix broken build

Recent merge of 'linux-kselftest-4.11-rc1' tree broke bpf test build.
None of the tests were building and test_verifier.c had tons of compiler errors.
Fix it and add #ifdef CAP_IS_SUPPORTED to support old versions of libcap.
Tested on centos 6.8 and 7

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/uapi/linux/bpf_perf_event.h   | 18 ++++++++++++++++++
 tools/testing/selftests/bpf/Makefile        |  4 +++-
 tools/testing/selftests/bpf/test_verifier.c |  4 ++++
 3 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 tools/include/uapi/linux/bpf_perf_event.h

diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 000000000000..067427259820
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+	struct pt_regs regs;
+	__u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4b498265dae6..67531f47781b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,12 +1,14 @@
 LIBDIR := ../../../lib
 BPFOBJ := $(LIBDIR)/bpf/bpf.o
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
+all: $(TEST_GEN_PROGS)
+
 .PHONY: all clean force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e1f5b9eea1e8..d1555e4240c0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,6 +8,8 @@
  * License as published by the Free Software Foundation.
  */
 
+#include <asm/types.h>
+#include <linux/types.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -4583,10 +4585,12 @@ static bool is_admin(void)
 	cap_flag_value_t sysadmin = CAP_CLEAR;
 	const cap_value_t cap_val = CAP_SYS_ADMIN;
 
+#ifdef CAP_IS_SUPPORTED
 	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
 		perror("cap_get_flag");
 		return false;
 	}
+#endif
 	caps = cap_get_proc();
 	if (!caps) {
 		perror("cap_get_proc");
-- 
cgit v1.2.3


From 65869a47f3488253f5fd88cc4f14e0a4e2601a55 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 11 Mar 2017 16:55:49 +0100
Subject: bpf: improve read-only handling

Improve bpf_{prog,jit_binary}_{un,}lock_ro() by throwing a
one-time warning in case of an error when the image couldn't
be set read-only, and also mark struct bpf_prog as locked when
bpf_prog_lock_ro() was called.

Reason for the latter is that bpf_prog_unlock_ro() is called from
various places including error paths, and we shouldn't mess with
page attributes when really not needed.

For bpf_jit_binary_unlock_ro() this is not needed as jited flag
implicitly indicates this, thus for archs with ARCH_HAS_SET_MEMORY
we're guaranteed to have a previously locked image. Overall, this
should also help us to identify any further potential issues with
set_memory_*() helpers.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0c167fdee5f7..fbf7b39e8103 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -409,6 +409,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
+				locked:1,	/* Program image locked? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-	set_memory_ro((unsigned long)fp, fp->pages);
+	fp->locked = 1;
+	WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-	set_memory_rw((unsigned long)fp, fp->pages);
+	if (fp->locked) {
+		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+		/* In case set_memory_rw() fails, we want to be the first
+		 * to crash here instead of some random place later on.
+		 */
+		fp->locked = 0;
+	}
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-	set_memory_ro((unsigned long)hdr, hdr->pages);
+	WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-	set_memory_rw((unsigned long)hdr, hdr->pages);
+	WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
 }
 #else
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
-- 
cgit v1.2.3


From 80354c29025833acd72ddac1ffa21c6cb50128cd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sun, 12 Mar 2017 17:07:44 +0200
Subject: x86/platform/intel-mid: Correct MSI IRQ line for watchdog device

The interrupt line used for the watchdog is 12, according to the official
Intel Edison BSP code.

And indeed after fixing it we start getting an interrupt and thus the
watchdog starts working again:

  [  191.699951] Kernel panic - not syncing: Kernel Watchdog

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 78a3bb9e408b ("x86: intel-mid: add watchdog platform code for Merrifield")
Link: http://lkml.kernel.org/r/20170312150744.45493-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 86edd1e941eb..9e304e2ea4f5 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -19,7 +19,7 @@
 #include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 
-#define TANGIER_EXT_TIMER0_MSI 15
+#define TANGIER_EXT_TIMER0_MSI 12
 
 static struct platform_device wdt_dev = {
 	.name = "intel_mid_wdt",
-- 
cgit v1.2.3


From 9fa1d7537242bd580ffa99c4725a0407096aad26 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 28 Feb 2017 10:11:45 +0200
Subject: drm/omap: fix dmabuf mmap for dma_alloc'ed buffers

omap_gem_dmabuf_mmap() returns an error (with a WARN) when called for a
buffer which is allocated with dma_alloc_*(). This prevents dmabuf mmap
from working on SoCs without DMM, e.g. AM4 and OMAP3.

I could not find any reason for omap_gem_dmabuf_mmap() rejecting such
buffers, and just removing the if() fixes the limitation.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index af267c35d813..ee5883f59be5 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
 	struct drm_gem_object *obj = buffer->priv;
 	int ret = 0;
 
-	if (WARN_ON(!obj->filp))
-		return -EINVAL;
-
 	ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
 	if (ret < 0)
 		return ret;
-- 
cgit v1.2.3


From 337ba7fbf0fbc12242359c4af114878618b90951 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Sat, 25 Feb 2017 16:29:50 +0300
Subject: uapi: fix drm/omap_drm.h userspace compilation errors

Consistently use types from linux/types.h like in other uapi drm/*_drm.h
header files to fix the following drm/omap_drm.h userspace compilation
errors:

/usr/include/drm/omap_drm.h:36:2: error: unknown type name 'uint64_t'
  uint64_t param;   /* in */
/usr/include/drm/omap_drm.h:37:2: error: unknown type name 'uint64_t'
  uint64_t value;   /* in (set_param), out (get_param) */
/usr/include/drm/omap_drm.h:56:2: error: unknown type name 'uint32_t'
  uint32_t bytes;  /* (for non-tiled formats) */
/usr/include/drm/omap_drm.h:58:3: error: unknown type name 'uint16_t'
   uint16_t width;
/usr/include/drm/omap_drm.h:59:3: error: unknown type name 'uint16_t'
   uint16_t height;
/usr/include/drm/omap_drm.h:65:2: error: unknown type name 'uint32_t'
  uint32_t flags;   /* in */
/usr/include/drm/omap_drm.h:66:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* out */
/usr/include/drm/omap_drm.h:67:2: error: unknown type name 'uint32_t'
  uint32_t __pad;
/usr/include/drm/omap_drm.h:77:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* buffer handle (in) */
/usr/include/drm/omap_drm.h:78:2: error: unknown type name 'uint32_t'
  uint32_t op;   /* mask of omap_gem_op (in) */
/usr/include/drm/omap_drm.h:82:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* buffer handle (in) */
/usr/include/drm/omap_drm.h:83:2: error: unknown type name 'uint32_t'
  uint32_t op;   /* mask of omap_gem_op (in) */
/usr/include/drm/omap_drm.h:88:2: error: unknown type name 'uint32_t'
  uint32_t nregions;
/usr/include/drm/omap_drm.h:89:2: error: unknown type name 'uint32_t'
  uint32_t __pad;
/usr/include/drm/omap_drm.h:93:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* buffer handle (in) */
/usr/include/drm/omap_drm.h:94:2: error: unknown type name 'uint32_t'
  uint32_t pad;
/usr/include/drm/omap_drm.h:95:2: error: unknown type name 'uint64_t'
  uint64_t offset;  /* mmap offset (out) */
/usr/include/drm/omap_drm.h:102:2: error: unknown type name 'uint32_t'
  uint32_t size;   /* virtual size for mmap'ing (out) */
/usr/include/drm/omap_drm.h:103:2: error: unknown type name 'uint32_t'
  uint32_t __pad;

Fixes: ef6503e89194 ("drm: Kbuild: add omap_drm.h to the installed headers")
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/uapi/drm/omap_drm.h | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 407cb55df6ac..7fb97863c945 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -33,8 +33,8 @@ extern "C" {
 #define OMAP_PARAM_CHIPSET_ID	1	/* ie. 0x3430, 0x4430, etc */
 
 struct drm_omap_param {
-	uint64_t param;			/* in */
-	uint64_t value;			/* in (set_param), out (get_param) */
+	__u64 param;			/* in */
+	__u64 value;			/* in (set_param), out (get_param) */
 };
 
 #define OMAP_BO_SCANOUT		0x00000001	/* scanout capable (phys contiguous) */
@@ -53,18 +53,18 @@ struct drm_omap_param {
 #define OMAP_BO_TILED		(OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32)
 
 union omap_gem_size {
-	uint32_t bytes;		/* (for non-tiled formats) */
+	__u32 bytes;		/* (for non-tiled formats) */
 	struct {
-		uint16_t width;
-		uint16_t height;
+		__u16 width;
+		__u16 height;
 	} tiled;		/* (for tiled formats) */
 };
 
 struct drm_omap_gem_new {
 	union omap_gem_size size;	/* in */
-	uint32_t flags;			/* in */
-	uint32_t handle;		/* out */
-	uint32_t __pad;
+	__u32 flags;			/* in */
+	__u32 handle;			/* out */
+	__u32 __pad;
 };
 
 /* mask of operations: */
@@ -74,33 +74,33 @@ enum omap_gem_op {
 };
 
 struct drm_omap_gem_cpu_prep {
-	uint32_t handle;		/* buffer handle (in) */
-	uint32_t op;			/* mask of omap_gem_op (in) */
+	__u32 handle;			/* buffer handle (in) */
+	__u32 op;			/* mask of omap_gem_op (in) */
 };
 
 struct drm_omap_gem_cpu_fini {
-	uint32_t handle;		/* buffer handle (in) */
-	uint32_t op;			/* mask of omap_gem_op (in) */
+	__u32 handle;			/* buffer handle (in) */
+	__u32 op;			/* mask of omap_gem_op (in) */
 	/* TODO maybe here we pass down info about what regions are touched
 	 * by sw so we can be clever about cache ops?  For now a placeholder,
 	 * set to zero and we just do full buffer flush..
 	 */
-	uint32_t nregions;
-	uint32_t __pad;
+	__u32 nregions;
+	__u32 __pad;
 };
 
 struct drm_omap_gem_info {
-	uint32_t handle;		/* buffer handle (in) */
-	uint32_t pad;
-	uint64_t offset;		/* mmap offset (out) */
+	__u32 handle;			/* buffer handle (in) */
+	__u32 pad;
+	__u64 offset;			/* mmap offset (out) */
 	/* note: in case of tiled buffers, the user virtual size can be
 	 * different from the physical size (ie. how many pages are needed
 	 * to back the object) which is returned in DRM_IOCTL_GEM_OPEN..
 	 * This size here is the one that should be used if you want to
 	 * mmap() the buffer:
 	 */
-	uint32_t size;			/* virtual size for mmap'ing (out) */
-	uint32_t __pad;
+	__u32 size;			/* virtual size for mmap'ing (out) */
+	__u32 __pad;
 };
 
 #define DRM_OMAP_GET_PARAM		0x00
-- 
cgit v1.2.3


From fd89b23a4632d3cbdee398048497e026edadfb71 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 6 Mar 2017 00:02:52 +0800
Subject: netfilter: nft_set_bitmap: fetch the element key based on the
 set->klen

Currently we just assume the element key as a u32 integer, regardless of
the set key length.

This is incorrect, for example, the tcp port number is only 16 bits.
So when we use the nft_payload expr to get the tcp dport and store
it to dreg, the dport will be stored at 0~15 bits, and 16~31 bits
will be padded with zero.

So the reg->data[dreg] will be looked like as below:
  0          15           31
  +-+-+-+-+-+-+-+-+-+-+-+-+
  | tcp dport |      0    |
  +-+-+-+-+-+-+-+-+-+-+-+-+
But for these big-endian systems, if we treate this register as a u32
integer, the element key will be larger than 65535, so the following
lookup in bitmap set will cause out of bound access.

Another issue is that if we add element with comments in bitmap
set(although the comments will be ignored eventually), the element will
vanish strangely. Because we treate the element key as a u32 integer, so
the comments will become the part of the element key, then the element
key will also be larger than 65535 and out of bound access will happen:
  # nft add element t s { 1 comment test }

Since set->klen is 1 or 2, it's fine to treate the element key as a u8 or
u16 integer.

Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_bitmap.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 152d226552c1..9b024e22717b 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -45,9 +45,17 @@ struct nft_bitmap {
 	u8	bitmap[];
 };
 
-static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+static inline void nft_bitmap_location(const struct nft_set *set,
+				       const void *key,
+				       u32 *idx, u32 *off)
 {
-	u32 k = (key << 1);
+	u32 k;
+
+	if (set->klen == 2)
+		k = *(u16 *)key;
+	else
+		k = *(u8 *)key;
+	k <<= 1;
 
 	*idx = k / BITS_PER_BYTE;
 	*off = k % BITS_PER_BYTE;
@@ -69,7 +77,7 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
 	u8 genmask = nft_genmask_cur(net);
 	u32 idx, off;
 
-	nft_bitmap_location(*key, &idx, &off);
+	nft_bitmap_location(set, key, &idx, &off);
 
 	return nft_bitmap_active(priv->bitmap, idx, off, genmask);
 }
@@ -83,7 +91,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
 		return -EEXIST;
 
@@ -102,7 +110,7 @@ static void nft_bitmap_remove(const struct net *net,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	/* Enter 00 state. */
 	priv->bitmap[idx] &= ~(genmask << off);
 }
@@ -116,7 +124,7 @@ static void nft_bitmap_activate(const struct net *net,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	/* Enter 11 state. */
 	priv->bitmap[idx] |= (genmask << off);
 }
@@ -128,7 +136,7 @@ static bool nft_bitmap_flush(const struct net *net,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	/* Enter 10 state, similar to deactivation. */
 	priv->bitmap[idx] &= ~(genmask << off);
 
@@ -161,10 +169,9 @@ static void *nft_bitmap_deactivate(const struct net *net,
 	struct nft_bitmap *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_next(net);
 	struct nft_set_ext *ext;
-	u32 idx, off, key = 0;
+	u32 idx, off;
 
-	memcpy(&key, elem->key.val.data, set->klen);
-	nft_bitmap_location(key, &idx, &off);
+	nft_bitmap_location(set, elem->key.val.data, &idx, &off);
 
 	if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
 		return NULL;
-- 
cgit v1.2.3


From 10596608c4d62cb8c1c2b806debcbd32fe657e71 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Wed, 8 Mar 2017 22:54:18 +0800
Subject: netfilter: nf_tables: fix mismatch in big-endian system

Currently, there are two different methods to store an u16 integer to
the u32 data register. For example:
  u32 *dest = &regs->data[priv->dreg];
  1. *dest = 0; *(u16 *) dest = val_u16;
  2. *dest = val_u16;

For method 1, the u16 value will be stored like this, either in
big-endian or little-endian system:
  0          15           31
  +-+-+-+-+-+-+-+-+-+-+-+-+
  |   Value   |     0     |
  +-+-+-+-+-+-+-+-+-+-+-+-+

For method 2, in little-endian system, the u16 value will be the same
as listed above. But in big-endian system, the u16 value will be stored
like this:
  0          15           31
  +-+-+-+-+-+-+-+-+-+-+-+-+
  |     0     |   Value   |
  +-+-+-+-+-+-+-+-+-+-+-+-+

So later we use "memcmp(&regs->data[priv->sreg], data, 2);" to do
compare in nft_cmp, nft_lookup expr ..., method 2 will get the wrong
result in big-endian system, as 0~15 bits will always be zero.

For the similar reason, when loading an u16 value from the u32 data
register, we should use "*(u16 *) sreg;" instead of "(u16)*sreg;",
the 2nd method will get the wrong value in the big-endian system.

So introduce some wrapper functions to store/load an u8 or u16
integer to/from the u32 data register, and use them in the right
place.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h   | 29 +++++++++++++++++++++++++++
 net/ipv4/netfilter/nft_masq_ipv4.c  |  8 ++++----
 net/ipv4/netfilter/nft_redir_ipv4.c |  8 ++++----
 net/ipv6/netfilter/nft_masq_ipv6.c  |  8 ++++----
 net/ipv6/netfilter/nft_redir_ipv6.c |  8 ++++----
 net/netfilter/nft_ct.c              | 18 +++++++++--------
 net/netfilter/nft_meta.c            | 40 +++++++++++++++++++------------------
 net/netfilter/nft_nat.c             |  8 ++++----
 8 files changed, 80 insertions(+), 47 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2aa8a9d80fbe..70c5ca0c60b1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -103,6 +103,35 @@ struct nft_regs {
 	};
 };
 
+/* Store/load an u16 or u8 integer to/from the u32 data register.
+ *
+ * Note, when using concatenations, register allocation happens at 32-bit
+ * level. So for store instruction, pad the rest part with zero to avoid
+ * garbage values.
+ */
+
+static inline void nft_reg_store16(u32 *dreg, u16 val)
+{
+	*dreg = 0;
+	*(u16 *)dreg = val;
+}
+
+static inline void nft_reg_store8(u32 *dreg, u8 val)
+{
+	*dreg = 0;
+	*(u8 *)dreg = val;
+}
+
+static inline u16 nft_reg_load16(u32 *sreg)
+{
+	return *(u16 *)sreg;
+}
+
+static inline u8 nft_reg_load8(u32 *sreg)
+{
+	return *(u8 *)sreg;
+}
+
 static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
 				 unsigned int len)
 {
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index a0ea8aad1bf1..f18677277119 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 	}
 	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
 						    &range, nft_out(pkt));
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 1650ed23c15d..5120be1d3118 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
 
 	memset(&mr, 0, sizeof(mr));
 	if (priv->sreg_proto_min) {
-		mr.range[0].min.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		mr.range[0].max.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		mr.range[0].min.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		mr.range[0].max.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 6c5b5b1830a7..4146536e9c15 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 	}
 	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
 						    nft_out(pkt));
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index f5ac080fc084..a27e424f690d 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
 
 	memset(&range, 0, sizeof(range));
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min],
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max],
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bf548a7a71ec..91585b5e5307 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 
 	switch (priv->key) {
 	case NFT_CT_DIRECTION:
-		*dest = CTINFO2DIR(ctinfo);
+		nft_reg_store8(dest, CTINFO2DIR(ctinfo));
 		return;
 	case NFT_CT_STATUS:
 		*dest = ct->status;
@@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 		return;
 	}
 	case NFT_CT_L3PROTOCOL:
-		*dest = nf_ct_l3num(ct);
+		nft_reg_store8(dest, nf_ct_l3num(ct));
 		return;
 	case NFT_CT_PROTOCOL:
-		*dest = nf_ct_protonum(ct);
+		nft_reg_store8(dest, nf_ct_protonum(ct));
 		return;
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	case NFT_CT_ZONE: {
 		const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+		u16 zoneid;
 
 		if (priv->dir < IP_CT_DIR_MAX)
-			*dest = nf_ct_zone_id(zone, priv->dir);
+			zoneid = nf_ct_zone_id(zone, priv->dir);
 		else
-			*dest = zone->id;
+			zoneid = zone->id;
 
+		nft_reg_store16(dest, zoneid);
 		return;
 	}
 #endif
@@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
 		return;
 	case NFT_CT_PROTO_SRC:
-		*dest = (__force __u16)tuple->src.u.all;
+		nft_reg_store16(dest, (__force u16)tuple->src.u.all);
 		return;
 	case NFT_CT_PROTO_DST:
-		*dest = (__force __u16)tuple->dst.u.all;
+		nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
 		return;
 	default:
 		break;
@@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
 	const struct nft_ct *priv = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
 	enum ip_conntrack_info ctinfo;
-	u16 value = regs->data[priv->sreg];
+	u16 value = nft_reg_load16(&regs->data[priv->sreg]);
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e1f5ca9b423b..7b60e01f38ff 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		*dest = skb->len;
 		break;
 	case NFT_META_PROTOCOL:
-		*dest = 0;
-		*(__be16 *)dest = skb->protocol;
+		nft_reg_store16(dest, (__force u16)skb->protocol);
 		break;
 	case NFT_META_NFPROTO:
-		*dest = nft_pf(pkt);
+		nft_reg_store8(dest, nft_pf(pkt));
 		break;
 	case NFT_META_L4PROTO:
 		if (!pkt->tprot_set)
 			goto err;
-		*dest = pkt->tprot;
+		nft_reg_store8(dest, pkt->tprot);
 		break;
 	case NFT_META_PRIORITY:
 		*dest = skb->priority;
@@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 	case NFT_META_IIFTYPE:
 		if (in == NULL)
 			goto err;
-		*dest = 0;
-		*(u16 *)dest = in->type;
+		nft_reg_store16(dest, in->type);
 		break;
 	case NFT_META_OIFTYPE:
 		if (out == NULL)
 			goto err;
-		*dest = 0;
-		*(u16 *)dest = out->type;
+		nft_reg_store16(dest, out->type);
 		break;
 	case NFT_META_SKUID:
 		sk = skb_to_full_sk(skb);
@@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 #endif
 	case NFT_META_PKTTYPE:
 		if (skb->pkt_type != PACKET_LOOPBACK) {
-			*dest = skb->pkt_type;
+			nft_reg_store8(dest, skb->pkt_type);
 			break;
 		}
 
 		switch (nft_pf(pkt)) {
 		case NFPROTO_IPV4:
 			if (ipv4_is_multicast(ip_hdr(skb)->daddr))
-				*dest = PACKET_MULTICAST;
+				nft_reg_store8(dest, PACKET_MULTICAST);
 			else
-				*dest = PACKET_BROADCAST;
+				nft_reg_store8(dest, PACKET_BROADCAST);
 			break;
 		case NFPROTO_IPV6:
-			*dest = PACKET_MULTICAST;
+			nft_reg_store8(dest, PACKET_MULTICAST);
 			break;
 		case NFPROTO_NETDEV:
 			switch (skb->protocol) {
@@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 					goto err;
 
 				if (ipv4_is_multicast(iph->daddr))
-					*dest = PACKET_MULTICAST;
+					nft_reg_store8(dest, PACKET_MULTICAST);
 				else
-					*dest = PACKET_BROADCAST;
+					nft_reg_store8(dest, PACKET_BROADCAST);
 
 				break;
 			}
 			case htons(ETH_P_IPV6):
-				*dest = PACKET_MULTICAST;
+				nft_reg_store8(dest, PACKET_MULTICAST);
 				break;
 			default:
 				WARN_ON_ONCE(1);
@@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
-	u32 value = regs->data[meta->sreg];
+	u32 *sreg = &regs->data[meta->sreg];
+	u32 value = *sreg;
+	u8 pkt_type;
 
 	switch (meta->key) {
 	case NFT_META_MARK:
@@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 		skb->priority = value;
 		break;
 	case NFT_META_PKTTYPE:
-		if (skb->pkt_type != value &&
-		    skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
-			skb->pkt_type = value;
+		pkt_type = nft_reg_load8(sreg);
+
+		if (skb->pkt_type != pkt_type &&
+		    skb_pkt_type_ok(pkt_type) &&
+		    skb_pkt_type_ok(skb->pkt_type))
+			skb->pkt_type = pkt_type;
 		break;
 	case NFT_META_NFTRACE:
 		skb->nf_trace = !!value;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 19a7bf3236f9..439e0bd152a0 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr,
 	}
 
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
-- 
cgit v1.2.3


From 4ca60d08cbe65f501baad64af50fceba79c19fbb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Mar 2017 23:22:30 +0100
Subject: netfilter: bridge: honor frag_max_size when refragmenting

consider a bridge with mtu 9000, but end host sending smaller
packets to another host with mtu < 9000.

In this case, after reassembly, bridge+defrag would refragment,
and then attempt to send the reassembled packet as long as it
was below 9k.

Instead we have to cap by the largest fragment size seen.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_hooks.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 95087e6e8258..3c5185021c1c 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -721,18 +721,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 
 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct nf_bridge_info *nf_bridge;
-	unsigned int mtu_reserved;
+	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+	unsigned int mtu, mtu_reserved;
 
 	mtu_reserved = nf_bridge_mtu_reduction(skb);
+	mtu = skb->dev->mtu;
+
+	if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+		mtu = nf_bridge->frag_max_size;
 
-	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+	if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
 		nf_bridge_info_free(skb);
 		return br_dev_queue_push_xmit(net, sk, skb);
 	}
 
-	nf_bridge = nf_bridge_info_get(skb);
-
 	/* This is wrong! We should preserve the original fragment
 	 * boundaries by preserving frag_list rather than refragmenting.
 	 */
-- 
cgit v1.2.3


From 170a1fb9c01bc40b7e8fd57a32ac9a0e131ec5b6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sat, 11 Mar 2017 00:25:26 -0500
Subject: netfilter: Force fake conntrack entry to be at least 8 bytes aligned

Since the nfct and nfctinfo have been combined, the nf_conn structure
must be at least 8 bytes aligned, as the 3 LSB bits are used for the
nfctinfo. But there's a fake nf_conn structure to denote untracked
connections, which is created by a PER_CPU construct. This does not
guarantee that it will be 8 bytes aligned and can break the logic in
determining the correct nfctinfo.

I triggered this on a 32bit machine with the following error:

BUG: unable to handle kernel NULL pointer dereference at 00000af4
IP: nf_ct_deliver_cached_events+0x1b/0xfb
*pdpt = 0000000031962001 *pde = 0000000000000000

Oops: 0000 [#1] SMP
[Modules linked in: ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 crc_ccitt ppdev r8169 parport_pc parport
  OK  ]
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-test+ #75
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
task: c126ec00 task.stack: c1258000
EIP: nf_ct_deliver_cached_events+0x1b/0xfb
EFLAGS: 00010202 CPU: 0
EAX: 0021cd01 EBX: 00000000 ECX: 27b0c767 EDX: 32bcb17a
ESI: f34135c0 EDI: f34135c0 EBP: f2debd60 ESP: f2debd3c
 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 80050033 CR2: 00000af4 CR3: 309a0440 CR4: 001406f0
Call Trace:
 <SOFTIRQ>
 ? ipv6_skip_exthdr+0xac/0xcb
 ipv6_confirm+0x10c/0x119 [nf_conntrack_ipv6]
 nf_hook_slow+0x22/0xc7
 nf_hook+0x9a/0xad [ipv6]
 ? ip6t_do_table+0x356/0x379 [ip6_tables]
 ? ip6_fragment+0x9e9/0x9e9 [ipv6]
 ip6_output+0xee/0x107 [ipv6]
 ? ip6_fragment+0x9e9/0x9e9 [ipv6]
 dst_output+0x36/0x4d [ipv6]
 NF_HOOK.constprop.37+0xb2/0xba [ipv6]
 ? icmp6_dst_alloc+0x2c/0xfd [ipv6]
 ? local_bh_enable+0x14/0x14 [ipv6]
 mld_sendpack+0x1c5/0x281 [ipv6]
 ? mark_held_locks+0x40/0x5c
 mld_ifc_timer_expire+0x1f6/0x21e [ipv6]
 call_timer_fn+0x135/0x283
 ? detach_if_pending+0x55/0x55
 ? mld_dad_timer_expire+0x3e/0x3e [ipv6]
 __run_timers+0x111/0x14b
 ? mld_dad_timer_expire+0x3e/0x3e [ipv6]
 run_timer_softirq+0x1c/0x36
 __do_softirq+0x185/0x37c
 ? test_ti_thread_flag.constprop.19+0xd/0xd
 do_softirq_own_stack+0x22/0x28
 </SOFTIRQ>
 irq_exit+0x5a/0xa4
 smp_apic_timer_interrupt+0x2a/0x34
 apic_timer_interrupt+0x37/0x3c

By using DEFINE/DECLARE_PER_CPU_ALIGNED we can enforce at least 8 byte
alignment as all cache line sizes are at least 8 bytes or more.

Fixes: a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 +-
 net/netfilter/nf_conntrack_core.c    | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f540f9ad2af4..19605878da47 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -244,7 +244,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
-DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
 static inline struct nf_conn *nf_ct_untracked_get(void)
 {
 	return raw_cpu_ptr(&nf_conntrack_untracked);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 071b97fcbefb..ffb78e5f7b70 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 seqcount_t nf_conntrack_generation __read_mostly;
 
-DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
+ * for the nfctinfo. We cheat by (ab)using the PER CPU cache line
+ * alignment to enforce this.
+ */
+DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static unsigned int nf_conntrack_hash_rnd __read_mostly;
-- 
cgit v1.2.3


From e920dde5160887d07b738f5a7f593b1fa9b1e32e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 10 Mar 2017 18:32:31 +0100
Subject: netfilter: nft_set_bitmap: keep a list of dummy elements

Element comments may come without any prior set flag, so we have to keep
a list of dummy struct nft_set_ext to keep this information around. This
is only useful for set dumps to userspace. From the packet path, this
set type relies on the bitmap representation. This patch simplifies the
logic since we don't need to allocate the dummy nft_set_ext structure
anymore on the fly at the cost of increasing memory consumption because
of the list of dummy struct nft_set_ext.

Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_bitmap.c | 146 +++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 80 deletions(-)

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 9b024e22717b..8ebbc2940f4c 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -15,6 +15,11 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
+struct nft_bitmap_elem {
+	struct list_head	head;
+	struct nft_set_ext	ext;
+};
+
 /* This bitmap uses two bits to represent one element. These two bits determine
  * the element state in the current and the future generation.
  *
@@ -41,8 +46,9 @@
  *      restore its previous state.
  */
 struct nft_bitmap {
-	u16	bitmap_size;
-	u8	bitmap[];
+	struct	list_head	list;
+	u16			bitmap_size;
+	u8			bitmap[];
 };
 
 static inline void nft_bitmap_location(const struct nft_set *set,
@@ -82,21 +88,43 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
 	return nft_bitmap_active(priv->bitmap, idx, off, genmask);
 }
 
+static struct nft_bitmap_elem *
+nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+		     u8 genmask)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (memcmp(nft_set_ext_key(&be->ext),
+			   nft_set_ext_key(&this->ext), set->klen) ||
+		    !nft_set_elem_active(&be->ext, genmask))
+			continue;
+
+		return be;
+	}
+	return NULL;
+}
+
 static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
 			     const struct nft_set_elem *elem,
-			     struct nft_set_ext **_ext)
+			     struct nft_set_ext **ext)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext *ext = elem->priv;
+	struct nft_bitmap_elem *new = elem->priv, *be;
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
-	if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+	be = nft_bitmap_elem_find(set, new, genmask);
+	if (be) {
+		*ext = &be->ext;
 		return -EEXIST;
+	}
 
+	nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off);
 	/* Enter 01 state. */
 	priv->bitmap[idx] |= (genmask << off);
+	list_add_tail_rcu(&new->head, &priv->list);
 
 	return 0;
 }
@@ -106,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net,
 			      const struct nft_set_elem *elem)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext *ext = elem->priv;
+	struct nft_bitmap_elem *be = elem->priv;
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
 	/* Enter 00 state. */
 	priv->bitmap[idx] &= ~(genmask << off);
+	list_del_rcu(&be->head);
 }
 
 static void nft_bitmap_activate(const struct net *net,
@@ -120,73 +149,52 @@ static void nft_bitmap_activate(const struct net *net,
 				const struct nft_set_elem *elem)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext *ext = elem->priv;
+	struct nft_bitmap_elem *be = elem->priv;
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
 	/* Enter 11 state. */
 	priv->bitmap[idx] |= (genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
 }
 
 static bool nft_bitmap_flush(const struct net *net,
-			     const struct nft_set *set, void *ext)
+			     const struct nft_set *set, void *_be)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_next(net);
+	struct nft_bitmap_elem *be = _be;
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
 	/* Enter 10 state, similar to deactivation. */
 	priv->bitmap[idx] &= ~(genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
 
 	return true;
 }
 
-static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
-						const struct nft_set_elem *elem)
-{
-	struct nft_set_ext_tmpl tmpl;
-	struct nft_set_ext *ext;
-
-	nft_set_ext_prepare(&tmpl);
-	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
-	ext = kzalloc(tmpl.len, GFP_KERNEL);
-	if (!ext)
-		return NULL;
-
-	nft_set_ext_init(ext, &tmpl);
-	memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
-
-	return ext;
-}
-
 static void *nft_bitmap_deactivate(const struct net *net,
 				   const struct nft_set *set,
 				   const struct nft_set_elem *elem)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *this = elem->priv, *be;
 	u8 genmask = nft_genmask_next(net);
-	struct nft_set_ext *ext;
 	u32 idx, off;
 
 	nft_bitmap_location(set, elem->key.val.data, &idx, &off);
 
-	if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
-		return NULL;
-
-	/* We have no real set extension since this is a bitmap, allocate this
-	 * dummy object that is released from the commit/abort path.
-	 */
-	ext = nft_bitmap_ext_alloc(set, elem);
-	if (!ext)
+	be = nft_bitmap_elem_find(set, this, genmask);
+	if (!be)
 		return NULL;
 
 	/* Enter 10 state. */
 	priv->bitmap[idx] &= ~(genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
 
-	return ext;
+	return be;
 }
 
 static void nft_bitmap_walk(const struct nft_ctx *ctx,
@@ -194,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
 			    struct nft_set_iter *iter)
 {
 	const struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext_tmpl tmpl;
+	struct nft_bitmap_elem *be;
 	struct nft_set_elem elem;
-	struct nft_set_ext *ext;
-	int idx, off;
-	u16 key;
-
-	nft_set_ext_prepare(&tmpl);
-	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
-	for (idx = 0; idx < priv->bitmap_size; idx++) {
-		for (off = 0; off < BITS_PER_BYTE; off += 2) {
-			if (iter->count < iter->skip)
-				goto cont;
-
-			if (!nft_bitmap_active(priv->bitmap, idx, off,
-					       iter->genmask))
-				goto cont;
-
-			ext = kzalloc(tmpl.len, GFP_KERNEL);
-			if (!ext) {
-				iter->err = -ENOMEM;
-				return;
-			}
-			nft_set_ext_init(ext, &tmpl);
-			key = ((idx * BITS_PER_BYTE) + off) >> 1;
-			memcpy(nft_set_ext_key(ext), &key, set->klen);
-
-			elem.priv = ext;
-			iter->err = iter->fn(ctx, set, iter, &elem);
-
-			/* On set flush, this dummy extension object is released
-			 * from the commit/abort path.
-			 */
-			if (!iter->flush)
-				kfree(ext);
-
-			if (iter->err < 0)
-				return;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (iter->count < iter->skip)
+			goto cont;
+		if (!nft_set_elem_active(&be->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = be;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+
+		if (iter->err < 0)
+			return;
 cont:
-			iter->count++;
-		}
+		iter->count++;
 	}
 }
 
@@ -265,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set,
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
 
+	INIT_LIST_HEAD(&priv->list);
 	priv->bitmap_size = nft_bitmap_size(set->klen);
 
 	return 0;
@@ -290,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
 
 static struct nft_set_ops nft_bitmap_ops __read_mostly = {
 	.privsize	= nft_bitmap_privsize,
+	.elemsize	= offsetof(struct nft_bitmap_elem, ext),
 	.estimate	= nft_bitmap_estimate,
 	.init		= nft_bitmap_init,
 	.destroy	= nft_bitmap_destroy,
-- 
cgit v1.2.3


From 0067d4b020ea07a58540acb2c5fcd3364bf326e0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 13 Mar 2017 16:10:11 +0200
Subject: blk-mq: Fix tagset reinit in the presence of cpu hot-unplug

In case cpu was unplugged, we need to make sure not to assume
that the tags for that cpu are still allocated. so check
for null tags when reinitializing a tagset.

Reported-by: Yi Zhang <yizhan@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e48bc2c72615..9d97bfc4d465 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		struct blk_mq_tags *tags = set->tags[i];
 
+		if (!tags)
+			continue;
+
 		for (j = 0; j < tags->nr_tags; j++) {
 			if (!tags->static_rqs[j])
 				continue;
-- 
cgit v1.2.3


From 4a3a485b1ed0e109718cc8c9d094fa0f552de9b2 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Fri, 10 Mar 2017 12:09:49 -0800
Subject: writeback: fix memory leak in wb_queue_work()

When WB_registered flag is not set, wb_queue_work() skips queuing the
work, but does not perform the necessary clean up. In particular, if
work->auto_free is true, it should free the memory.

The leak condition can be reprouced by following these steps:

   mount /dev/sdb /mnt/sdb
   /* In qemu console: device_del sdb */
   umount /dev/sdb

Above will result in a wb_queue_work() call on an unregistered wb and
thus leak memory.

Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/fs-writeback.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef600591d96f..63ee2940775c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
 	spin_unlock_bh(&wb->work_lock);
 }
 
+static void finish_writeback_work(struct bdi_writeback *wb,
+				  struct wb_writeback_work *work)
+{
+	struct wb_completion *done = work->done;
+
+	if (work->auto_free)
+		kfree(work);
+	if (done && atomic_dec_and_test(&done->cnt))
+		wake_up_all(&wb->bdi->wb_waitq);
+}
+
 static void wb_queue_work(struct bdi_writeback *wb,
 			  struct wb_writeback_work *work)
 {
 	trace_writeback_queue(wb, work);
 
-	spin_lock_bh(&wb->work_lock);
-	if (!test_bit(WB_registered, &wb->state))
-		goto out_unlock;
 	if (work->done)
 		atomic_inc(&work->done->cnt);
-	list_add_tail(&work->list, &wb->work_list);
-	mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+	spin_lock_bh(&wb->work_lock);
+
+	if (test_bit(WB_registered, &wb->state)) {
+		list_add_tail(&work->list, &wb->work_list);
+		mod_delayed_work(bdi_wq, &wb->dwork, 0);
+	} else
+		finish_writeback_work(wb, work);
+
 	spin_unlock_bh(&wb->work_lock);
 }
 
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
 
 	set_bit(WB_writeback_running, &wb->state);
 	while ((work = get_next_work_item(wb)) != NULL) {
-		struct wb_completion *done = work->done;
-
 		trace_writeback_exec(wb, work);
-
 		wrote += wb_writeback(wb, work);
-
-		if (work->auto_free)
-			kfree(work);
-		if (done && atomic_dec_and_test(&done->cnt))
-			wake_up_all(&wb->bdi->wb_waitq);
+		finish_writeback_work(wb, work);
 	}
 
 	/*
-- 
cgit v1.2.3


From 6f1f622019f95d79d6e2f8bb3781144ad0aff75f Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Mon, 6 Mar 2017 17:49:42 +0800
Subject: nfs4: fix a typo of NFS_ATTR_FATTR_GROUP_NAME

This typo cause a memory leak, and a bad client's group id.
unreferenced object 0xffff96d8073998d0 (size 8):
  comm "kworker/0:3", pid 34224, jiffies 4295361338 (age 761.752s)
  hex dump (first 8 bytes):
    30 00 39 07 d8 96 ff ff                          0.9.....
  backtrace:
    [<ffffffffb883212a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffffb8237bc0>] __kmalloc+0x140/0x220
    [<ffffffffc05c921c>] xdr_stream_decode_string_dup+0x7c/0x110 [sunrpc]
    [<ffffffffc08edcf0>] decode_getfattr_attrs+0x940/0x1630 [nfsv4]
    [<ffffffffc08eea7b>] decode_getfattr_generic.constprop.108+0x9b/0x100 [nfsv4]
    [<ffffffffc08eebaf>] nfs4_xdr_dec_open+0xcf/0x100 [nfsv4]
    [<ffffffffc05bf9c7>] rpcauth_unwrap_resp+0xa7/0xe0 [sunrpc]
    [<ffffffffc05afc70>] call_decode+0x1e0/0x810 [sunrpc]
    [<ffffffffc05bc64d>] __rpc_execute+0x8d/0x420 [sunrpc]
    [<ffffffffc05bc9f2>] rpc_async_schedule+0x12/0x20 [sunrpc]
    [<ffffffffb80bb077>] process_one_work+0x197/0x430
    [<ffffffffb80bb35e>] worker_thread+0x4e/0x4a0
    [<ffffffffb80c1d41>] kthread+0x101/0x140
    [<ffffffffb8839a5c>] ret_from_fork+0x2c/0x40
    [<ffffffffffffffff>] 0xffffffffffffffff

Fixes: 686a816ab6 ("NFSv4: Clean up owner/group attribute decode")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f0369e362753..80ce289eea05 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 		if (len <= 0)
 			goto out;
 		dprintk("%s: name=%s\n", __func__, group_name->data);
-		return NFS_ATTR_FATTR_OWNER_NAME;
+		return NFS_ATTR_FATTR_GROUP_NAME;
 	} else {
 		len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
 				XDR_MAX_NETOBJ);
-- 
cgit v1.2.3


From 366a1569bff3fe14abfdf9285e31e05e091745f5 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Mon, 6 Mar 2017 22:29:14 +0800
Subject: NFSv4: fix a reference leak caused WARNING messages

Because nfs4_opendata_access() has close the state when access is denied,
so the state isn't leak.
Rather than revert the commit a974deee47, I'd like clean the strange state close.

[ 1615.094218] ------------[ cut here ]------------
[ 1615.094607] WARNING: CPU: 0 PID: 23702 at lib/list_debug.c:31 __list_add_valid+0x8e/0xa0
[ 1615.094913] list_add double add: new=ffff9d7901d9f608, prev=ffff9d7901d9f608, next=ffff9d7901ee8dd0.
[ 1615.095458] Modules linked in: nfsv4(E) nfs(E) nfsd(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock f2fs snd_seq_midi snd_seq_midi_event fscrypto coretemp ppdev crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_rapl_perf vmw_balloon snd_ens1371 joydev gameport snd_ac97_codec ac97_bus snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp auth_rpcgss nfs_acl lockd(E) grace sunrpc(E) xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel mptspi e1000 serio_raw scsi_transport_spi mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs]
[ 1615.097663] CPU: 0 PID: 23702 Comm: fstest Tainted: G        W   E   4.11.0-rc1+ #517
[ 1615.098015] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[ 1615.098807] Call Trace:
[ 1615.099183]  dump_stack+0x63/0x86
[ 1615.099578]  __warn+0xcb/0xf0
[ 1615.099967]  warn_slowpath_fmt+0x5f/0x80
[ 1615.100370]  __list_add_valid+0x8e/0xa0
[ 1615.100760]  nfs4_put_state_owner+0x75/0xc0 [nfsv4]
[ 1615.101136]  __nfs4_close+0x109/0x140 [nfsv4]
[ 1615.101524]  nfs4_close_state+0x15/0x20 [nfsv4]
[ 1615.101949]  nfs4_close_context+0x21/0x30 [nfsv4]
[ 1615.102691]  __put_nfs_open_context+0xb8/0x110 [nfs]
[ 1615.103155]  put_nfs_open_context+0x10/0x20 [nfs]
[ 1615.103586]  nfs4_file_open+0x13b/0x260 [nfsv4]
[ 1615.103978]  do_dentry_open+0x20a/0x2f0
[ 1615.104369]  ? nfs4_copy_file_range+0x30/0x30 [nfsv4]
[ 1615.104739]  vfs_open+0x4c/0x70
[ 1615.105106]  ? may_open+0x5a/0x100
[ 1615.105469]  path_openat+0x623/0x1420
[ 1615.105823]  do_filp_open+0x91/0x100
[ 1615.106174]  ? __alloc_fd+0x3f/0x170
[ 1615.106568]  do_sys_open+0x130/0x220
[ 1615.106920]  ? __put_cred+0x3d/0x50
[ 1615.107256]  SyS_open+0x1e/0x20
[ 1615.107588]  entry_SYSCALL_64_fastpath+0x1a/0xa9
[ 1615.107922] RIP: 0033:0x7fab599069b0
[ 1615.108247] RSP: 002b:00007ffcf0600d78 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
[ 1615.108575] RAX: ffffffffffffffda RBX: 00007fab59bcfae0 RCX: 00007fab599069b0
[ 1615.108896] RDX: 0000000000000200 RSI: 0000000000000200 RDI: 00007ffcf060255e
[ 1615.109211] RBP: 0000000000040010 R08: 0000000000000000 R09: 0000000000000016
[ 1615.109515] R10: 00000000000006a1 R11: 0000000000000246 R12: 0000000000041000
[ 1615.109806] R13: 0000000000040010 R14: 0000000000001000 R15: 0000000000002710
[ 1615.110152] ---[ end trace 96ed63b1306bf2f3 ]---

Fixes: a974deee47 ("NFSv4: Fix memory and state leak in...")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b183686c6d4..c1f5369cd339 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
 	if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
 		return 0;
 
-	/* even though OPEN succeeded, access is denied. Close the file */
-	nfs4_close_state(state, fmode);
 	return -EACCES;
 }
 
-- 
cgit v1.2.3


From aac66bf5f916f645bd57029490a72c3f91f2c274 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 6 Mar 2017 23:54:01 +0000
Subject: drm/i915: use correct node for handling cache domain eviction

It looks like we were incorrectly comparing vma->node against itself
instead of the target node, when evicting for a node on systems where we
need guard pages between regions with different cache domains. As a
consequence we can end up trying to needlessly evict neighbouring nodes,
even if they have the same cache domain, and if they were pinned we
would fail the eviction.

Fixes: 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170306235414.23407-3-matthew.auld@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit fe65cbdbc97929e4a522716ed279a36783656142)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_evict.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index c181b1bb3d2c..3be2503aa042 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		 * those as well to make room for our guard pages.
 		 */
 		if (check_color) {
-			if (vma->node.start + vma->node.size == node->start) {
-				if (vma->node.color == node->color)
+			if (node->start + node->size == target->start) {
+				if (node->color == target->color)
 					continue;
 			}
-			if (vma->node.start == node->start + node->size) {
-				if (vma->node.color == node->color)
+			if (node->start == target->start + target->size) {
+				if (node->color == target->color)
 					continue;
 			}
 		}
-- 
cgit v1.2.3


From 3a0d137de035cc8c70194d9988ded61825b5ff8a Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Wed, 8 Mar 2017 13:00:07 +0100
Subject: drm/i915: Nuke skl_update_plane debug message from the pipe update
 critical section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

printks are slow so we should not be doing them from the vblank evade
critical section. These could explain why we sometimes seem to
blow past our 100 usec deadline.

The problem has been there ever since commit c331879ce8ea ("drm/i915:
skylake sprite plane scaling using shared scalers.") but it may not have
been readily visible until commit e1edbd44e23b ("drm/i915: Complain
if we take too long under vblank evasion.") increased our chances
of noticing it.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1488974407-25175-1-git-send-email-maarten.lankhorst@linux.intel.com
Fixes: c331879ce8ea ("drm/i915: skylake sprite plane scaling using shared scalers")
Cc: <stable@vger.kernel.org> # v4.2+
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
[mlankhorst: Add missing tags, point to the correct offending commit]
(cherry picked from commit d38146b9ee16264ff9a88bf3391ab9f2f5af3646)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_sprite.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 9ef54688872a..9481ca9a3ae7 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane,
 		int scaler_id = plane_state->scaler_id;
 		const struct intel_scaler *scaler;
 
-		DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n",
-			      plane_id, PS_PLANE_SEL(plane_id));
-
 		scaler = &crtc_state->scaler_state.scalers[scaler_id];
 
 		I915_WRITE(SKL_PS_CTRL(pipe, scaler_id),
-- 
cgit v1.2.3


From 6aef660370a9c246956ba6d01eebd8063c4214cb Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 10 Mar 2017 09:32:49 +0000
Subject: drm/i915: Fix forcewake active domain tracking

In commit 003342a50021 ("drm/i915: Keep track of active
forcewake domains in a bitmask") I forgot to adjust the
newly introduce fw_domains_active state across reset.

This caused the assert_forcewakes_inactive to trigger
during suspend and resume if there were user held
forcewakes.

v2: Bitmask checks are required since vfuncs are not
    always present.

v3: Move bitmask tracking to get/put vfunc for simplicity.
    (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 003342a50021 ("drm/i915: Keep track of active forcewake domains in a bitmask")
Testcase: igt/drv_suspend/forcewake
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: "Paneri, Praveen" <praveen.paneri@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: v4.10+ <stable@vger.kernel.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170310093249.4484-1-tvrtko.ursulin@linux.intel.com
(cherry picked from commit b8473050805f35add97f3ff57570d55a01808df5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index abe08885a5ba..b7ff592b14f5 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
 
 	for_each_fw_domain_masked(d, fw_domains, dev_priv)
 		fw_domain_wait_ack(d);
+
+	dev_priv->uncore.fw_domains_active |= fw_domains;
 }
 
 static void
@@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
 		fw_domain_put(d);
 		fw_domain_posting_read(d);
 	}
+
+	dev_priv->uncore.fw_domains_active &= ~fw_domains;
 }
 
 static void
@@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
 	if (WARN_ON(domain->wake_count == 0))
 		domain->wake_count++;
 
-	if (--domain->wake_count == 0) {
+	if (--domain->wake_count == 0)
 		dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask);
-		dev_priv->uncore.fw_domains_active &= ~domain->mask;
-	}
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 
@@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
 			fw_domains &= ~domain->mask;
 	}
 
-	if (fw_domains) {
+	if (fw_domains)
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-		dev_priv->uncore.fw_domains_active |= fw_domains;
-	}
 }
 
 /**
@@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
 		fw_domain_arm_timer(domain);
 
 	dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-	dev_priv->uncore.fw_domains_active |= fw_domains;
 }
 
 static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
-- 
cgit v1.2.3


From ce70df089143c49385b4f32f39d41fb50fbf6a7c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Mon, 13 Mar 2017 08:22:13 +0300
Subject: mm, gup: fix typo in gup_p4d_range()

gup_p4d_range() should call gup_pud_range(), not itself.

[ This was not noticed on x86: this is the HAVE_GENERIC_RCU_GUP code
  used by arm[64] and powerpc    - Linus ]

Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reported-by: Anton Blanchard <anton@samba.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/gup.c b/mm/gup.c
index c74bad1bf6e8..04aa405350dc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1455,7 +1455,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
 					 P4D_SHIFT, next, write, pages, nr))
 				return 0;
-		} else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+		} else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
 			return 0;
 	} while (p4dp++, addr = next, addr != end);
 
-- 
cgit v1.2.3


From 04166f48d9593af4513ae06c0f966c0cee300a20 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 13 Mar 2017 13:24:03 +0100
Subject: Revert "netfilter: nf_tables: add flush field to struct nft_set_iter"

This reverts commit 1f48ff6c5393aa7fe290faf5d633164f105b0aa7.

This patch is not required anymore now that we keep a dummy list of
set elements in the bitmap set implementation, so revert this before
we forget this code has no clients.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 1 -
 net/netfilter/nf_tables_api.c     | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 70c5ca0c60b1..0136028652bd 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -232,7 +232,6 @@ struct nft_set_elem {
 struct nft_set;
 struct nft_set_iter {
 	u8		genmask;
-	bool		flush;
 	unsigned int	count;
 	unsigned int	skip;
 	int		err;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5e0ccfd5bb37..434c739dfeca 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		iter.count	= 0;
 		iter.err	= 0;
 		iter.fn		= nf_tables_bind_check_setelem;
-		iter.flush	= false;
 
 		set->ops->walk(ctx, set, &iter);
 		if (iter.err < 0)
@@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	args.iter.flush		= false;
 	set->ops->walk(&ctx, set, &args.iter);
 
 	nla_nest_end(skb, nest);
@@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 		struct nft_set_iter iter = {
 			.genmask	= genmask,
 			.fn		= nft_flush_set,
-			.flush		= true,
 		};
 		set->ops->walk(&ctx, set, &iter);
 
@@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 			iter.count	= 0;
 			iter.err	= 0;
 			iter.fn		= nf_tables_loop_check_setelem;
-			iter.flush	= false;
 
 			set->ops->walk(ctx, set, &iter);
 			if (iter.err < 0)
-- 
cgit v1.2.3


From c5f7c5a9a0f84c511a8a336491f9b8a3060b6517 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 6 Mar 2017 16:21:16 +0200
Subject: drivers, xen: convert grant_map.users from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/gntdev.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index c77a0751a311..f3bf8f4e2d6c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -36,6 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/refcount.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -86,7 +87,7 @@ struct grant_map {
 	int index;
 	int count;
 	int flags;
-	atomic_t users;
+	refcount_t users;
 	struct unmap_notify notify;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
@@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
 	add->index = 0;
 	add->count = count;
-	atomic_set(&add->users, 1);
+	refcount_set(&add->users, 1);
 
 	return add;
 
@@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
 	if (!map)
 		return;
 
-	if (!atomic_dec_and_test(&map->users))
+	if (!refcount_dec_and_test(&map->users))
 		return;
 
 	atomic_sub(map->count, &pages_mapped);
@@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma)
 	struct grant_map *map = vma->vm_private_data;
 
 	pr_debug("gntdev_vma_open %p\n", vma);
-	atomic_inc(&map->users);
+	refcount_inc(&map->users);
 }
 
 static void gntdev_vma_close(struct vm_area_struct *vma)
@@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		goto unlock_out;
 	}
 
-	atomic_inc(&map->users);
+	refcount_inc(&map->users);
 
 	vma->vm_ops = &gntdev_vmops;
 
-- 
cgit v1.2.3


From 44fee88cea43d3c2cac962e0439cb10a3cabff6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 13 Mar 2017 15:57:12 +0100
Subject: x86/tsc: Fix ART for TSC_KNOWN_FREQ

Subhransu reported that convert_art_to_tsc() isn't working for him.

The ART to TSC relation is only set up for systems which use the refined
TSC calibration. Systems with known TSC frequency (available via CPUID 15)
are not using the refined calibration and therefor the ART to TSC relation
is never established.

Add the setup to the known frequency init path which skips ART
calibration. The init code needs to be duplicated as for systems which use
refined calibration the ART setup must be delayed until calibration has
been done.

The problem has been there since the ART support was introdduced, but only
detected now because Subhransu tested the first time on hardware which has
TSC frequency enumerated via CPUID 15.

Note for stable: The conditional has changed from TSC_RELIABLE to
     	 	 TSC_KNOWN_FREQUENCY.

[ tglx: Rewrote changelog and identified the proper 'Fixes' commit ]

Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource")
Reported-by: "Prusty, Subhransu S" <subhransu.s.prusty@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Cc: christopher.s.hall@intel.com
Cc: kevin.b.stanton@intel.com
Cc: john.stultz@linaro.org
Cc: akataria@vmware.com
Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4f7a9833d8e5..c73a7f9e881a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void)
 	 * the refined calibration and directly register it as a clocksource.
 	 */
 	if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
+		if (boot_cpu_has(X86_FEATURE_ART))
+			art_related_clocksource = &clocksource_tsc;
 		clocksource_register_khz(&clocksource_tsc, tsc_khz);
 		return 0;
 	}
-- 
cgit v1.2.3


From 67e194007be08d071294456274dd53e0a04fdf90 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 13 Mar 2017 13:28:09 +0100
Subject: ipv6: make ECMP route replacement less greedy

Commit 27596472473a ("ipv6: fix ECMP route replacement") introduced a
loop that removes all siblings of an ECMP route that is being
replaced. However, this loop doesn't stop when it has replaced
siblings, and keeps removing other routes with a higher metric.
We also end up triggering the WARN_ON after the loop, because after
this nsiblings < 0.

Instead, stop the loop when we have taken care of all routes with the
same metric as the route being replaced.

  Reproducer:
  ===========
    #!/bin/sh

    ip netns add ns1
    ip netns add ns2
    ip -net ns1 link set lo up

    for x in 0 1 2 ; do
        ip link add veth$x netns ns2 type veth peer name eth$x netns ns1
        ip -net ns1 link set eth$x up
        ip -net ns2 link set veth$x up
    done

    ip -net ns1 -6 r a 2000::/64 nexthop via fe80::0 dev eth0 \
            nexthop via fe80::1 dev eth1 nexthop via fe80::2 dev eth2
    ip -net ns1 -6 r a 2000::/64 via fe80::42 dev eth0 metric 256
    ip -net ns1 -6 r a 2000::/64 via fe80::43 dev eth0 metric 2048

    echo "before replace, 3 routes"
    ip -net ns1 -6 r | grep -v '^fe80\|^ff00'
    echo

    ip -net ns1 -6 r c 2000::/64 nexthop via fe80::4 dev eth0 \
            nexthop via fe80::5 dev eth1 nexthop via fe80::6 dev eth2

    echo "after replace, only 2 routes, metric 2048 is gone"
    ip -net ns1 -6 r | grep -v '^fe80\|^ff00'

Fixes: 27596472473a ("ipv6: fix ECMP route replacement")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e4266746e4a2..d4bf2c68a545 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -923,6 +923,8 @@ add:
 			ins = &rt->dst.rt6_next;
 			iter = *ins;
 			while (iter) {
+				if (iter->rt6i_metric > rt->rt6i_metric)
+					break;
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->dst.rt6_next;
 					fib6_purge_rt(iter, fn, info->nl_net);
-- 
cgit v1.2.3


From 68c32f9c2a36d410aa242e661506e5b2c2764179 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:39:01 +0100
Subject: isdn/gigaset: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: cf7776dc05b8 ("[PATCH] isdn4linux: Siemens Gigaset drivers -
direct USB connection")
Cc: stable <stable@vger.kernel.org>	# 2.6.17
Cc: Hansjoerg Lipp <hjlipp@web.de>

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/gigaset/bas-gigaset.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 11e13c56126f..2da3ff650e1d 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface,
 		return -ENODEV;
 	}
 
+	if (hostif->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	dev_info(&udev->dev,
 		 "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n",
 		 __func__, le16_to_cpu(udev->descriptor.idVendor),
-- 
cgit v1.2.3


From 6e526fdff7be4f13b24f929a04c0e9ae6761291e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:42:03 +0100
Subject: net: wimax/i2400m: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory beyond the endpoint array should a
malicious device lack the expected endpoints.

The endpoints are specifically dereferenced in the i2400m_bootrom_init
path during probe (e.g. in i2400mu_tx_bulk_out).

Fixes: f398e4240fce ("i2400m/USB: probe/disconnect, dev init/shutdown
and reset backends")
Cc: Inaky Perez-Gonzalez <inaky@linux.intel.com>

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index e7f5910a6519..f8eb66ef2944 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface,
 	struct i2400mu *i2400mu;
 	struct usb_device *usb_dev = interface_to_usbdev(iface);
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+		return -ENODEV;
+
 	if (usb_dev->speed != USB_SPEED_HIGH)
 		dev_err(dev, "device not connected as high speed\n");
 
-- 
cgit v1.2.3


From 0d443b70cc92d741cbc1dcbf1079897b3d8bc3cc Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 7 Mar 2017 15:08:42 -0600
Subject: x86/platform: Remove warning message for duplicate NMI handlers

Remove the WARNING message associated with multiple NMI handlers as
there are at least two that are legitimate.  These are the KGDB and the
UV handlers and both want to be called if the NMI has not been claimed
by any other NMI handler.

Use of the UNKNOWN NMI call chain dramatically lowers the NMI call rate
when high frequency NMI tools are in use, notably the perf tools.  It is
required on systems that cannot sustain a high NMI call rate without
adversely affecting the system operation.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Frank Ramsay <frank.ramsay@hpe.com>
Cc: Tony Ernst <tony.ernst@hpe.com>
Link: http://lkml.kernel.org/r/20170307210841.730959611@asylum.americas.sgi.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index f088ea4c66e7..a723ae9440ab 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 	spin_lock_irqsave(&desc->lock, flags);
 
 	/*
-	 * most handlers of type NMI_UNKNOWN never return because
-	 * they just assume the NMI is theirs.  Just a sanity check
-	 * to manage expectations
+	 * Indicate if there are multiple registrations on the
+	 * internal NMI handler call chains (SERR and IO_CHECK).
 	 */
-	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
 	WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
 	WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
 
-- 
cgit v1.2.3


From 79e49503efe53a8c51d8b695bedc8a346c5e4a87 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 Mar 2017 16:24:28 +0100
Subject: ipv6: avoid write to a possibly cloned skb

ip6_fragment, in case skb has a fraglist, checks if the
skb is cloned.  If it is, it will move to the 'slow path' and allocates
new skbs for each fragment.

However, right before entering the slowpath loop, it updates the
nexthdr value of the last ipv6 extension header to NEXTHDR_FRAGMENT,
to account for the fragment header that will be inserted in the new
ipv6-fragment skbs.

In case original skb is cloned this munges nexthdr value of another
skb.  Avoid this by doing the nexthdr update for each of the new fragment
skbs separately.

This was observed with tcpdump on a bridge device where netfilter ipv6
reassembly is active:  tcpdump shows malformed fragment headers as
the l4 header (icmpv6, tcp, etc). is decoded as a fragment header.

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reported-by: Andreas Karis <akaris@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index df42096e1f04..58f6288e9ba5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -768,13 +768,14 @@ slow_path:
 	 *	Fragment the datagram.
 	 */
 
-	*prevhdr = NEXTHDR_FRAGMENT;
 	troom = rt->dst.dev->needed_tailroom;
 
 	/*
 	 *	Keep copying data until we run out.
 	 */
 	while (left > 0)	{
+		u8 *fragnexthdr_offset;
+
 		len = left;
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
@@ -819,6 +820,10 @@ slow_path:
 		 */
 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 
+		fragnexthdr_offset = skb_network_header(frag);
+		fragnexthdr_offset += prevhdr - skb_network_header(skb);
+		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
 		/*
 		 *	Build fragment header.
 		 */
-- 
cgit v1.2.3


From a13b2082ece95247779b9995c4e91b4246bed023 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 Mar 2017 17:38:17 +0100
Subject: bridge: drop netfilter fake rtable unconditionally

Andreas reports kernel oops during rmmod of the br_netfilter module.
Hannes debugged the oops down to a NULL rt6info->rt6i_indev.

Problem is that br_netfilter has the nasty concept of adding a fake
rtable to skb->dst; this happens in a br_netfilter prerouting hook.

A second hook (in bridge LOCAL_IN) is supposed to remove these again
before the skb is handed up the stack.

However, on module unload hooks get unregistered which means an
skb could traverse the prerouting hook that attaches the fake_rtable,
while the 'fake rtable remove' hook gets removed from the hooklist
immediately after.

Fixes: 34666d467cbf1e2e3c7 ("netfilter: bridge: move br_netfilter out of the core")
Reported-by: Andreas Karis <akaris@redhat.com>
Debugged-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c           |  1 +
 net/bridge/br_netfilter_hooks.c | 21 ---------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 236f34244dbe..013f2290bfa5 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -30,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook);
 static int
 br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	br_drop_fake_rtable(skb);
 	return netif_receive_skb(skb);
 }
 
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 95087e6e8258..fa87fbd62bb7 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv,
 }
 
 
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one.  On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
-				   struct sk_buff *skb,
-				   const struct nf_hook_state *state)
-{
-	br_drop_fake_rtable(skb);
-	return NF_ACCEPT;
-}
-
 /* PF_BRIDGE/FORWARD *************************************************/
 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -907,12 +892,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 		.hooknum = NF_BR_PRE_ROUTING,
 		.priority = NF_BR_PRI_BRNF,
 	},
-	{
-		.hook = br_nf_local_in,
-		.pf = NFPROTO_BRIDGE,
-		.hooknum = NF_BR_LOCAL_IN,
-		.priority = NF_BR_PRI_BRNF,
-	},
 	{
 		.hook = br_nf_forward_ip,
 		.pf = NFPROTO_BRIDGE,
-- 
cgit v1.2.3


From c42f8218610aa09d7d3795e5810387673c1f84b6 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 9 Mar 2017 17:20:04 +0100
Subject: iio: sw-device: Fix config group initialization

Use the IS_ENABLED() helper macro to ensure that the configfs group is
initialized either when configfs is built-in or when configfs is built as a
module. Otherwise software device creation will result in undefined
behaviour when configfs is built as a module since the configfs group for
the device not properly initialized.

Similar to commit b2f0c09664b7 ("iio: sw-trigger: Fix config group
initialization").

Fixes: 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs")
Reported-by: Miguel Robles <miguel.robles@farole.net>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Daniel Baluta <daniel.baluta@gmail.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/sw_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h
index 23ca41515527..fa7931933067 100644
--- a/include/linux/iio/sw_device.h
+++ b/include/linux/iio/sw_device.h
@@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d,
 				  const char *name,
 				  struct config_item_type *type)
 {
-#ifdef CONFIG_CONFIGFS_FS
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
 	config_group_init_type_name(&d->group, name, type);
 #endif
 }
-- 
cgit v1.2.3


From c836e5cf0d0880d6668966476c4ffe727f29f69a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Mar 2017 13:24:21 +0200
Subject: x86/platform/intel-mid: Use common power off sequence

Intel Medfield may use common for Intel MID devices power sequence.
Remove unneded custom power off stub.

While here, remove function forward declaration.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170308112422.67533-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/platform/intel-mid/mfld.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index e793fe509971..e42978d4deaf 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -17,16 +17,6 @@
 
 #include "intel_mid_weak_decls.h"
 
-static void penwell_arch_setup(void);
-/* penwell arch ops */
-static struct intel_mid_ops penwell_ops = {
-	.arch_setup = penwell_arch_setup,
-};
-
-static void mfld_power_off(void)
-{
-}
-
 static unsigned long __init mfld_calibrate_tsc(void)
 {
 	unsigned long fast_calibrate;
@@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void)
 static void __init penwell_arch_setup(void)
 {
 	x86_platform.calibrate_tsc = mfld_calibrate_tsc;
-	pm_power_off = mfld_power_off;
 }
 
+static struct intel_mid_ops penwell_ops = {
+	.arch_setup = penwell_arch_setup,
+};
+
 void *get_penwell_ops(void)
 {
 	return &penwell_ops;
-- 
cgit v1.2.3


From 859bb6d59066b96341020e0991f191631cabe59d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Mar 2017 13:24:22 +0200
Subject: x86/platform/intel-mid: Add power button support for Merrifield

Intel Merrifield platform has a Basin Cove PMIC to handle in particular
power button events. Add necessary bits to enable it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170308112422.67533-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/platform/intel-mid/device_libs/Makefile   |  1 +
 .../device_libs/platform_mrfld_power_btn.c         | 82 ++++++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index a7dbec4dce27..3dbde04febdc 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
 obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
new file mode 100644
index 000000000000..a6c3705a28ad
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
@@ -0,0 +1,82 @@
+/*
+ * Intel Merrifield power button support
+ *
+ * (C) Copyright 2017 Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static struct resource mrfld_power_btn_resources[] = {
+	{
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mrfld_power_btn_dev = {
+	.name		= "msic_power_btn",
+	.id		= PLATFORM_DEVID_NONE,
+	.num_resources	= ARRAY_SIZE(mrfld_power_btn_resources),
+	.resource	= mrfld_power_btn_resources,
+};
+
+static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
+					     unsigned long code, void *data)
+{
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&mrfld_power_btn_dev);
+		return 0;
+	}
+
+	return platform_device_register(&mrfld_power_btn_dev);
+}
+
+static struct notifier_block mrfld_power_btn_scu_notifier = {
+	.notifier_call	= mrfld_power_btn_scu_status_change,
+};
+
+static int __init register_mrfld_power_btn(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before
+	 * PMIC power button device can be registered:
+	 */
+	intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
+
+	return 0;
+}
+arch_initcall(register_mrfld_power_btn);
+
+static void __init *mrfld_power_btn_platform_data(void *info)
+{
+	struct resource *res = mrfld_power_btn_resources;
+	struct sfi_device_table_entry *pentry = info;
+
+	res->start = res->end = pentry->irq;
+	return NULL;
+}
+
+static const struct devs_id mrfld_power_btn_dev_id __initconst = {
+	.name			= "bcove_power_btn",
+	.type			= SFI_DEV_TYPE_IPC,
+	.delay			= 1,
+	.msic			= 1,
+	.get_platform_data	= &mrfld_power_btn_platform_data,
+};
+
+sfi_device(mrfld_power_btn_dev_id);
-- 
cgit v1.2.3


From 9aea151f282df2b82a44fd7058a239334437c266 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 26 Feb 2017 01:02:09 +0100
Subject: ARM: dts: add the AB8500 clocks to the device tree

This adds the AB8500 clocks to the device tree using the new
bindings from the clk subsystem, making audio work again.

Cc: Lee Jones <lee.jones@linaro.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/ste-dbx5x0.dtsi  | 19 +++++++++++++++++++
 arch/arm/boot/dts/ste-href.dtsi    |  9 ---------
 arch/arm/boot/dts/ste-snowball.dts |  9 ---------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index 82d8c4771293..162e1eb5373d 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -14,6 +14,7 @@
 #include <dt-bindings/mfd/dbx500-prcmu.h>
 #include <dt-bindings/arm/ux500_pm_domains.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/ste-ab8500.h>
 #include "skeleton.dtsi"
 
 / {
@@ -603,6 +604,11 @@
 				interrupt-controller;
 				#interrupt-cells = <2>;
 
+				ab8500_clock: clock-controller {
+					compatible = "stericsson,ab8500-clk";
+					#clock-cells = <1>;
+				};
+
 				ab8500_gpio: ab8500-gpio {
 					compatible = "stericsson,ab8500-gpio";
 					gpio-controller;
@@ -686,6 +692,8 @@
 
 				ab8500-pwm {
 					compatible = "stericsson,ab8500-pwm";
+					clocks = <&ab8500_clock AB8500_SYSCLK_INT>;
+					clock-names = "intclk";
 				};
 
 				ab8500-debugfs {
@@ -700,6 +708,9 @@
 					V-AMIC2-supply = <&ab8500_ldo_anamic2_reg>;
 					V-DMIC-supply = <&ab8500_ldo_dmic_reg>;
 
+					clocks = <&ab8500_clock AB8500_SYSCLK_AUDIO>;
+					clock-names = "audioclk";
+
 					stericsson,earpeice-cmv = <950>; /* Units in mV. */
 				};
 
@@ -1095,6 +1106,14 @@
 			status = "disabled";
 		};
 
+		sound {
+			compatible = "stericsson,snd-soc-mop500";
+			stericsson,cpu-dai = <&msp1 &msp3>;
+			stericsson,audio-codec = <&codec>;
+			clocks = <&prcmu_clk PRCMU_SYSCLK>, <&ab8500_clock AB8500_SYSCLK_ULP>, <&ab8500_clock AB8500_SYSCLK_INT>;
+			clock-names = "sysclk", "ulpclk", "intclk";
+		};
+
 		msp0: msp@80123000 {
 			compatible = "stericsson,ux500-msp-i2s";
 			reg = <0x80123000 0x1000>;
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index f37f9e10713c..9e359e4f342e 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -186,15 +186,6 @@
 			status = "okay";
 		};
 
-		sound {
-			compatible = "stericsson,snd-soc-mop500";
-
-			stericsson,cpu-dai = <&msp1 &msp3>;
-			stericsson,audio-codec = <&codec>;
-			clocks = <&prcmu_clk PRCMU_SYSCLK>;
-			clock-names = "sysclk";
-		};
-
 		msp0: msp@80123000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&msp0_default_mode>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index dd5514def604..ade1d0d4e5f4 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -159,15 +159,6 @@
 				     "", "", "", "", "", "", "", "";
 		};
 
-		sound {
-			compatible = "stericsson,snd-soc-mop500";
-
-			stericsson,cpu-dai = <&msp1 &msp3>;
-			stericsson,audio-codec = <&codec>;
-			clocks = <&prcmu_clk PRCMU_SYSCLK>;
-			clock-names = "sysclk";
-		};
-
 		msp0: msp@80123000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&msp0_default_mode>;
-- 
cgit v1.2.3


From 6e7408acd04d06c04981c0c0fb5a2462b16fae4f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 12 Mar 2017 18:12:56 +0100
Subject: cpufreq: intel_pstate: Update pid_params.sample_rate_ns in
 pid_param_set()

Fix the debugfs interface for PID tuning to actually update
pid_params.sample_rate_ns on PID parameters updates, as changing
pid_params.sample_rate_ms via debugfs has no effect now.

Fixes: a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d37219a0dd7..f9fe910f3b83 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -989,6 +989,7 @@ static void intel_pstate_update_policies(void)
 static int pid_param_set(void *data, u64 val)
 {
 	*(u32 *)data = val;
+	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
 	intel_pstate_reset_all_pid();
 	return 0;
 }
-- 
cgit v1.2.3


From be3606ff739d1c1be36389f8737c577ad87e1f57 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 13 Mar 2017 19:33:37 +0300
Subject: x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y

The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y
options selected. With branch profiling enabled we end up calling
ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is
built with KASAN instrumentation, so calling it before kasan has been
initialized leads to crash.

Use DISABLE_BRANCH_PROFILING define to make sure that we don't call
ftrace_likely_update() from early code before kasan_early_init().

Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Alexander Potapenko <glider@google.com>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: lkp@01.org
Cc: Dmitry Vyukov <dvyukov@google.com>
Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c    | 1 +
 arch/x86/mm/kasan_init_64.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 54a2372f5dbb..b5785c197e53 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
+#define DISABLE_BRANCH_PROFILING
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8d63d7a104c3..4c90cfdc128b 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define DISABLE_BRANCH_PROFILING
 #define pr_fmt(fmt) "kasan: " fmt
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
-- 
cgit v1.2.3


From 91864f5852f9996210fad400cf70fb85af091243 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Sun, 12 Mar 2017 21:36:18 -0700
Subject: net: use net->count to check whether a netns is alive or not
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous idea was to check whether a net namespace is in
net_exit_list or not. It doesn't work, because net->exit_list is used in
__register_pernet_operations and __unregister_pernet_operations where
all namespaces are added to a temporary list to make cleanup in a error
case, so list_empty(&net->exit_list) always returns false.

Reported-by: Mantas Mikulėnas <grawity@gmail.com>
Fixes: 002d8a1a6c11 ("net: skip genenerating uevents for network namespaces that are exiting")
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3945821e9c1f..65ea0ff4017c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -953,7 +953,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
 	while (--i >= new_num) {
 		struct kobject *kobj = &dev->_rx[i].kobj;
 
-		if (!list_empty(&dev_net(dev)->exit_list))
+		if (!atomic_read(&dev_net(dev)->count))
 			kobj->uevent_suppress = 1;
 		if (dev->sysfs_rx_queue_group)
 			sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1371,7 +1371,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
 	while (--i >= new_num) {
 		struct netdev_queue *queue = dev->_tx + i;
 
-		if (!list_empty(&dev_net(dev)->exit_list))
+		if (!atomic_read(&dev_net(dev)->count))
 			queue->kobj.uevent_suppress = 1;
 #ifdef CONFIG_BQL
 		sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
 {
 	struct device *dev = &(ndev->dev);
 
-	if (!list_empty(&dev_net(ndev)->exit_list))
+	if (!atomic_read(&dev_net(ndev)->count))
 		dev_set_uevent_suppress(dev, 1);
 
 	kobject_get(&dev->kobj);
-- 
cgit v1.2.3


From c80498e36d4ef3e24599d363c622fbf22a1293cc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 13 Mar 2017 16:24:03 +0100
Subject: vxlan: fix ovs support

The required changes in the function vxlan_dev_create() were missing
in commit 8bcdc4f3a20b.
The vxlan device is not registered anymore after this patch and the error
path causes an stack dump:
 WARNING: CPU: 3 PID: 1498 at net/core/dev.c:6713 rollback_registered_many+0x9d/0x3f0

Fixes: 8bcdc4f3a20b ("vxlan: add changelink support")
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 73 +++++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e375560cc74e..bdb6ae16d4a8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2976,6 +2976,44 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	return 0;
 }
 
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+			      struct vxlan_config *conf)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	int err;
+
+	err = vxlan_dev_configure(net, dev, conf, false);
+	if (err)
+		return err;
+
+	dev->ethtool_ops = &vxlan_ethtool_ops;
+
+	/* create an fdb entry for a valid default destination */
+	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+		err = vxlan_fdb_create(vxlan, all_zeros_mac,
+				       &vxlan->default_dst.remote_ip,
+				       NUD_REACHABLE | NUD_PERMANENT,
+				       NLM_F_EXCL | NLM_F_CREATE,
+				       vxlan->cfg.dst_port,
+				       vxlan->default_dst.remote_vni,
+				       vxlan->default_dst.remote_vni,
+				       vxlan->default_dst.remote_ifindex,
+				       NTF_SELF);
+		if (err)
+			return err;
+	}
+
+	err = register_netdevice(dev);
+	if (err) {
+		vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
+		return err;
+	}
+
+	list_add(&vxlan->next, &vn->vxlan_list);
+	return 0;
+}
+
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			 struct net_device *dev, struct vxlan_config *conf,
 			 bool changelink)
@@ -3172,8 +3210,6 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
-	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_config conf;
 	int err;
 
@@ -3181,36 +3217,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err)
 		return err;
 
-	err = vxlan_dev_configure(src_net, dev, &conf, false);
-	if (err)
-		return err;
-
-	dev->ethtool_ops = &vxlan_ethtool_ops;
-
-	/* create an fdb entry for a valid default destination */
-	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
-		err = vxlan_fdb_create(vxlan, all_zeros_mac,
-				       &vxlan->default_dst.remote_ip,
-				       NUD_REACHABLE | NUD_PERMANENT,
-				       NLM_F_EXCL | NLM_F_CREATE,
-				       vxlan->cfg.dst_port,
-				       vxlan->default_dst.remote_vni,
-				       vxlan->default_dst.remote_vni,
-				       vxlan->default_dst.remote_ifindex,
-				       NTF_SELF);
-		if (err)
-			return err;
-	}
-
-	err = register_netdevice(dev);
-	if (err) {
-		vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
-		return err;
-	}
-
-	list_add(&vxlan->next, &vn->vxlan_list);
-
-	return 0;
+	return __vxlan_dev_create(src_net, dev, &conf);
 }
 
 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -3440,7 +3447,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
 	if (IS_ERR(dev))
 		return dev;
 
-	err = vxlan_dev_configure(net, dev, conf, false);
+	err = __vxlan_dev_create(net, dev, conf);
 	if (err < 0) {
 		free_netdev(dev);
 		return ERR_PTR(err);
-- 
cgit v1.2.3


From 09498087f922bb623b66db9e89c6fd11a3799867 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Thu, 9 Mar 2017 07:41:16 +0100
Subject: serial: 8250_dw: Honor clk_round_rate errors in dw8250_set_termios

clk_round_rate returns a signed long and may possibly return errors
in it, for example if there is no possible rate.

Till now dw8250_set_termios ignored any error, the signednes and would
just use the value as input to clk_set_rate. This of course falls apart
if there is an actual error, so check for errors and only try to set
a rate if the value is actually valid.

This turned up on some Rockchip platforms after commit
6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
enabled set_termios callback in all cases, not only ACPI.

Fixes: 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 6ee55a2d47bb..223ac234ddb2 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -257,7 +257,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 {
 	unsigned int baud = tty_termios_baud_rate(termios);
 	struct dw8250_data *d = p->private_data;
-	unsigned int rate;
+	long rate;
 	int ret;
 
 	if (IS_ERR(d->clk) || !old)
@@ -265,7 +265,10 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 
 	clk_disable_unprepare(d->clk);
 	rate = clk_round_rate(d->clk, baud * 16);
-	ret = clk_set_rate(d->clk, rate);
+	if (rate < 0)
+		ret = rate;
+	else
+		ret = clk_set_rate(d->clk, rate);
 	clk_prepare_enable(d->clk);
 
 	if (!ret)
-- 
cgit v1.2.3


From b15bfbe6427712d1b992bf806a6df9c05002a0a4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Sat, 4 Mar 2017 13:09:58 +0000
Subject: serial: 8250_dw: Fix breakage when HAVE_CLK=n

Commit 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be
used") recently broke the 8250_dw driver on platforms which don't select
HAVE_CLK, as dw8250_set_termios() gets confused by the behaviour of the
fallback HAVE_CLK=n clock API in linux/clk.h which pretends everything
is fine but returns (valid) NULL clocks and 0 HZ clock rates.

That 0 rate is written into the uartclk resulting in a crash at boot,
e.g. on Cavium Octeon III based UTM-8 we get something like this:

1180000000800.serial: ttyS0 at MMIO 0x1180000000800 (irq = 41, base_baud = 25000000) is a OCTEON
------------[ cut here ]------------
WARNING: CPU: 2 PID: 1 at drivers/tty/serial/serial_core.c:441 uart_get_baud_rate+0xfc/0x1f0
...
Call Trace:
...
[<ffffffff8149c2e4>] uart_get_baud_rate+0xfc/0x1f0
[<ffffffff814a5098>] serial8250_do_set_termios+0xb0/0x440
[<ffffffff8149c710>] uart_set_options+0xe8/0x190
[<ffffffff814a6cdc>] serial8250_console_setup+0x84/0x158
[<ffffffff814a11ec>] univ8250_console_setup+0x54/0x70
[<ffffffff811901a0>] register_console+0x1c8/0x418
[<ffffffff8149f004>] uart_add_one_port+0x434/0x4b0
[<ffffffff814a1af8>] serial8250_register_8250_port+0x2d8/0x440
[<ffffffff814aa620>] dw8250_probe+0x388/0x5e8
...

The clock API is defined such that NULL is a valid clock handle so it
wouldn't be right to check explicitly for NULL. Instead treat a
clk_round_rate() return value of 0 as an error which prevents uartclk
being overwritten.

Fixes: 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-serial@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: bcm-kernel-feedback-list@broadcom.com
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Jason Uy <jason.uy@broadcom.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 223ac234ddb2..e65808c482f1 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -267,6 +267,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 	rate = clk_round_rate(d->clk, baud * 16);
 	if (rate < 0)
 		ret = rate;
+	else if (rate == 0)
+		ret = -ENOENT;
 	else
 		ret = clk_set_rate(d->clk, rate);
 	clk_prepare_enable(d->clk);
-- 
cgit v1.2.3


From 542ed784671d4678406c77ed6dd01593a0cdbea1 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Tue, 28 Feb 2017 14:30:33 -0600
Subject: tty: acpi/spcr: QDF2400 E44 checks for wrong OEM revision

For Qualcomm Technologies QDF2400 SOCs that are affected by erratum E44,
the ACPI oem_revision field is actually set to 1, not 0.

Fixes: d8a4995bcea1 ("tty: pl011: Work around QDF2400 E44 stuck BUSY bit")

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/spcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 01c94669a2b0..3afa8c1fa127 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -30,7 +30,7 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
 		return true;
 
 	if (!memcmp(h->oem_table_id, "QDF2400 ", ACPI_OEM_TABLE_ID_SIZE) &&
-			h->oem_revision == 0)
+			h->oem_revision == 1)
 		return true;
 
 	return false;
-- 
cgit v1.2.3


From 3f8ed54aee491bbb83656592c2d0ad7b78d045ca Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 13 Mar 2017 18:30:12 -0700
Subject: cpufreq: intel_pstate: Correct frequency setting in the HWP mode

In the functions intel_pstate_hwp_set(), min/max range from HWP capability
MSR along with max_perf_pct and min_perf_pct, is used to set the HWP
request MSR. In some cases this doesn't result in the correct HWP max/min
in HWP request.

For example: In the following case:

HWP capabilities from MSR 0x771
0x70a1220

Here cpufreq min/max frequencies from above MSR dump are 700MHz and 3.2GHz
respectively.

This will result in
hwp_min = 0x07
hwp_max = 0x20

To limit max frequency to 2GHz:

perf_limits->max_perf_pct = 63 (2GHz as a percent of 3.2GHz rounded up)

With the current calculation:
adj_range = max_perf_pct * range / 100;
adj_range = 63 * (32 - 7) / 100
adj_range = 15

max = hw_min + adj_range;
max = 7 + 15 = 22

This will result in HWP request of 0x160f, which will result in a
frequency cap of 2.2GHz not 2GHz.

The problem with the above calculation is that hwp_min of 7 is treated
as 0% in the range. But max_perf_pct is calculated with respect to minimum
as 0 and max as 3.2GHz or hwp_max, so adding hwp_min to it will result in
more than the desired.

Since the min_perf_pct and max_perf_pct is already a percent of max
frequency or hwp_max, this min/max HWP request value can be calculated
directly applying these percentage to hwp_max.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f9fe910f3b83..ee12641ee010 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -845,7 +845,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
-	int min, hw_min, max, hw_max, cpu, range, adj_range;
+	int min, hw_min, max, hw_max, cpu;
 	struct perf_limits *perf_limits = limits;
 	u64 value, cap;
 
@@ -863,20 +863,17 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 			hw_max = HWP_GUARANTEED_PERF(cap);
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
-		range = hw_max - hw_min;
 
 		max_perf_pct = perf_limits->max_perf_pct;
 		min_perf_pct = perf_limits->min_perf_pct;
+		min = hw_max * min_perf_pct / 100;
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
-		adj_range = min_perf_pct * range / 100;
-		min = hw_min + adj_range;
+
 		value &= ~HWP_MIN_PERF(~0L);
 		value |= HWP_MIN_PERF(min);
 
-		adj_range = max_perf_pct * range / 100;
-		max = hw_min + adj_range;
-
+		max = hw_max * max_perf_pct / 100;
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
 
-- 
cgit v1.2.3


From 64ff64b90e62c860772fd0be50b7cfcef1d8a9b2 Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:12 -0800
Subject: scsi: megaraid_sas: enable intx only if msix request fails

Without this fix, driver will enable INTx Interrupt pin even though
MSI-x vectors are enabled. See below lspci output. DisINTx is unset for
MSIx setup.

lspci -s 85:00.0 -vvv |grep INT |grep Control
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B- DisINTx-

After applying this fix, driver will enable INTx Interrupt pin only if
Legacy interrupt method is required.  See below lspci output. DisINTx is
set for MSIx setup.  lspci -s 85:00.0 -vvv |grep INT |grep Control
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B- DisINTx+

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 7ac9a9ee9bd4..016ffcebaf66 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5034,10 +5034,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
 					 &instance->irq_context[j]);
 			/* Retry irq register for IO_APIC*/
 			instance->msix_vectors = 0;
-			if (is_probe)
+			if (is_probe) {
+				pci_free_irq_vectors(instance->pdev);
 				return megasas_setup_irqs_ioapic(instance);
-			else
+			} else {
 				return -1;
+			}
 		}
 	}
 	return 0;
@@ -5277,9 +5279,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
 			MPI2_REPLY_POST_HOST_INDEX_OFFSET);
 	}
 
-	i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
-	if (i < 0)
-		goto fail_setup_irqs;
+	if (!instance->msix_vectors) {
+		i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
+		if (i < 0)
+			goto fail_setup_irqs;
+	}
 
 	dev_info(&instance->pdev->dev,
 		"firmware supports msix\t: (%d)", fw_msix_count);
-- 
cgit v1.2.3


From 49524b3c6e12375627ddd870613fcc6b24909898 Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:13 -0800
Subject: scsi: megaraid_sas: add correct return type check for ldio hint logic
 for raid1

Return value check of atomic_dec_if_positive is required as it returns
old value minus one.  Without this fix, driver will send small ios to
firmware path and that will be a performance issue.

Not critical, but good to have r1_ldio_hint as default value in sdev
private.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c   | 3 +++
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 016ffcebaf66..0016f12cc563 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1963,6 +1963,9 @@ scan_target:
 	if (!mr_device_priv_data)
 		return -ENOMEM;
 	sdev->hostdata = mr_device_priv_data;
+
+	atomic_set(&mr_device_priv_data->r1_ldio_hint,
+		   instance->r1_ldio_hint_default);
 	return 0;
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 29650ba669da..ebd746e2d97c 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
 				fp_possible = false;
 				atomic_dec(&instance->fw_outstanding);
 			} else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) ||
-				   atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) {
+				   (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) {
 				fp_possible = false;
 				atomic_dec(&instance->fw_outstanding);
 				if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)
-- 
cgit v1.2.3


From 874d025da667d19b728141437ccbefe9dbaf9e7b Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:14 -0800
Subject: scsi: megaraid_sas: raid6 also require cpuSel check same as raid5

Without this fix, raid6 performance will not be optimal.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index ebd746e2d97c..f990ab4d45e1 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
 				cpu_sel = MR_RAID_CTX_CPUSEL_1;
 
 			if (is_stream_detected(rctx_g35) &&
-			    (raid->level == 5) &&
+			    ((raid->level == 5) || (raid->level == 6)) &&
 			    (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
 			    (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
 				cpu_sel = MR_RAID_CTX_CPUSEL_0;
-- 
cgit v1.2.3


From 22487b66594f18f2a7c211f3ab8bb02dec74d37b Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:15 -0800
Subject: scsi: megaraid_sas: Driver version upgrade

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index e7e5974e1a2c..2b209bbb4c91 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -35,8 +35,8 @@
 /*
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION				"07.701.16.00-rc1"
-#define MEGASAS_RELDATE				"February 2, 2017"
+#define MEGASAS_VERSION				"07.701.17.00-rc1"
+#define MEGASAS_RELDATE				"March 2, 2017"
 
 /*
  * Device IDs
-- 
cgit v1.2.3


From 02bb56ddc6711639c549d81c7b9f6d845da243a9 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Tue, 14 Mar 2017 09:38:33 +0800
Subject: ucc/hdlc: fix two little issue

1. modify bd_status from u32 to u16 in function hdlc_rx_done,
because bd_status register is 16bits
2. write bd_length register before writing bd_status register

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/fsl_ucc_hdlc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index a5045b5279d7..6742ae605660 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -381,8 +381,8 @@ static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
 	/* set bd status and length */
 	bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
 
-	iowrite16be(bd_status, &bd->status);
 	iowrite16be(skb->len, &bd->length);
+	iowrite16be(bd_status, &bd->status);
 
 	/* Move to next BD in the ring */
 	if (!(bd_status & T_W_S))
@@ -457,7 +457,7 @@ static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit)
 	struct sk_buff *skb;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	struct qe_bd *bd;
-	u32 bd_status;
+	u16 bd_status;
 	u16 length, howmany = 0;
 	u8 *bdbuffer;
 	int i;
-- 
cgit v1.2.3


From 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 Mon Sep 17 00:00:00 2001
From: Jon Maxwell <jmaxwell37@gmail.com>
Date: Fri, 10 Mar 2017 16:40:33 +1100
Subject: dccp/tcp: fix routing redirect race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As Eric Dumazet pointed out this also needs to be fixed in IPv6.
v2: Contains the IPv6 tcp/Ipv6 dccp patches as well.

We have seen a few incidents lately where a dst_enty has been freed
with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that
dst_entry. If the conditions/timings are right a crash then ensues when the
freed dst_entry is referenced later on. A Common crashing back trace is:

 #8 [] page_fault at ffffffff8163e648
    [exception RIP: __tcp_ack_snd_check+74]
.
.
 #9 [] tcp_rcv_established at ffffffff81580b64
#10 [] tcp_v4_do_rcv at ffffffff8158b54a
#11 [] tcp_v4_rcv at ffffffff8158cd02
#12 [] ip_local_deliver_finish at ffffffff815668f4
#13 [] ip_local_deliver at ffffffff81566bd9
#14 [] ip_rcv_finish at ffffffff8156656d
#15 [] ip_rcv at ffffffff81566f06
#16 [] __netif_receive_skb_core at ffffffff8152b3a2
#17 [] __netif_receive_skb at ffffffff8152b608
#18 [] netif_receive_skb at ffffffff8152b690
#19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3]
#20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3]
#21 [] net_rx_action at ffffffff8152bac2
#22 [] __do_softirq at ffffffff81084b4f
#23 [] call_softirq at ffffffff8164845c
#24 [] do_softirq at ffffffff81016fc5
#25 [] irq_exit at ffffffff81084ee5
#26 [] do_IRQ at ffffffff81648ff8

Of course it may happen with other NIC drivers as well.

It's found the freed dst_entry here:

 224 static bool tcp_in_quickack_mode(struct sock *sk)↩
 225 {↩
 226 ▹       const struct inet_connection_sock *icsk = inet_csk(sk);↩
 227 ▹       const struct dst_entry *dst = __sk_dst_get(sk);↩
 228 ↩
 229 ▹       return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩
 230 ▹       ▹       (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩
 231 }↩

But there are other backtraces attributed to the same freed dst_entry in
netfilter code as well.

All the vmcores showed 2 significant clues:

- Remote hosts behind the default gateway had always been redirected to a
different gateway. A rtable/dst_entry will be added for that host. Making
more dst_entrys with lower reference counts. Making this more probable.

- All vmcores showed a postitive LockDroppedIcmps value, e.g:

LockDroppedIcmps                  267

A closer look at the tcp_v4_err() handler revealed that do_redirect() will run
regardless of whether user space has the socket locked. This can result in a
race condition where the same dst_entry cached in sk->sk_dst_entry can be
decremented twice for the same socket via:

do_redirect()->__sk_dst_check()-> dst_release().

Which leads to the dst_entry being prematurely freed with another socket
pointing to it via sk->sk_dst_cache and a subsequent crash.

To fix this skip do_redirect() if usespace has the socket locked. Instead let
the redirect take place later when user space does not have the socket
locked.

The dccp/IPv6 code is very similar in this respect, so fixing it there too.

As Eric Garver pointed out the following commit now invalidates routes. Which
can set the dst->obsolete flag so that ipv4_dst_check() returns null and
triggers the dst_release().

Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.")
Cc: Eric Garver <egarver@redhat.com>
Cc: Hannes Sowa <hsowa@redhat.com>
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c     | 3 ++-
 net/dccp/ipv6.c     | 8 +++++---
 net/ipv4/tcp_ipv4.c | 3 ++-
 net/ipv6/tcp_ipv6.c | 8 +++++---
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 409d0cfd3447..b99168b0fabf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 
 	switch (type) {
 	case ICMP_REDIRECT:
-		dccp_do_redirect(skb, sk);
+		if (!sock_owned_by_user(sk))
+			dccp_do_redirect(skb, sk);
 		goto out;
 	case ICMP_SOURCE_QUENCH:
 		/* Just silently ignore these. */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 233b57367758..d9b6a4e403e7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	np = inet6_sk(sk);
 
 	if (type == NDISC_REDIRECT) {
-		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+		if (!sock_owned_by_user(sk)) {
+			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-		if (dst)
-			dst->ops->redirect(dst, sk, skb);
+			if (dst)
+				dst->ops->redirect(dst, sk, skb);
+		}
 		goto out;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f3ec1365497..575e19dcc017 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -431,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 	switch (type) {
 	case ICMP_REDIRECT:
-		do_redirect(icmp_skb, sk);
+		if (!sock_owned_by_user(sk))
+			do_redirect(icmp_skb, sk);
 		goto out;
 	case ICMP_SOURCE_QUENCH:
 		/* Just silently ignore these. */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 60a5295a7de6..49fa2e8c3fa9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	np = inet6_sk(sk);
 
 	if (type == NDISC_REDIRECT) {
-		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+		if (!sock_owned_by_user(sk)) {
+			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-		if (dst)
-			dst->ops->redirect(dst, sk, skb);
+			if (dst)
+				dst->ops->redirect(dst, sk, skb);
+		}
 		goto out;
 	}
 
-- 
cgit v1.2.3


From b20e2d54789c6acbf6bd0efdbec2cf5fa4d90ef1 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 13 Mar 2017 00:00:26 +0100
Subject: tun: fix premature POLLOUT notification on tun devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

aszlig observed failing ssh tunnels (-w) during initialization since
commit cc9da6cc4f56e0 ("ipv6: addrconf: use stable address generator for
ARPHRD_NONE"). We already had reports that the mentioned commit breaks
Juniper VPN connections. I can't clearly say that the Juniper VPN client
has the same problem, but it is worth a try to hint to this patch.

Because of the early generation of link local addresses, the kernel now
can start asking for routers on the local subnet much earlier than usual.
Those router solicitation packets arrive inside the ssh channels and
should be transmitted to the tun fd before the configuration scripts
might have upped the interface and made it ready for transmission.

ssh polls on the interface and receives back a POLL_OUT. It tries to send
the earily router solicitation packet to the tun interface.  Unfortunately
it hasn't been up'ed yet by config scripts, thus failing with -EIO. ssh
doesn't retry again and considers the tun interface broken forever.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=121131
Fixes: cc9da6cc4f56 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE")
Cc: Bjørn Mork <bjorn@mork.no>
Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Reported-by: Jonas Lippuner <jonas@lippuner.ca>
Cc: Jonas Lippuner <jonas@lippuner.ca>
Reported-by: aszlig <aszlig@redmoonstudios.org>
Cc: aszlig <aszlig@redmoonstudios.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f58b7d850114..34cc3c590aa5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -822,7 +822,18 @@ static void tun_net_uninit(struct net_device *dev)
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
+	struct tun_struct *tun = netdev_priv(dev);
+	int i;
+
 	netif_tx_start_all_queues(dev);
+
+	for (i = 0; i < tun->numqueues; i++) {
+		struct tun_file *tfile;
+
+		tfile = rtnl_dereference(tun->tfiles[i]);
+		tfile->socket.sk->sk_write_space(tfile->socket.sk);
+	}
+
 	return 0;
 }
 
@@ -1103,9 +1114,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 	if (!skb_array_empty(&tfile->tx_array))
 		mask |= POLLIN | POLLRDNORM;
 
-	if (sock_writeable(sk) ||
-	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
-	     sock_writeable(sk)))
+	if (tun->dev->flags & IFF_UP &&
+	    (sock_writeable(sk) ||
+	     (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+	      sock_writeable(sk))))
 		mask |= POLLOUT | POLLWRNORM;
 
 	if (tun->dev->reg_state != NETREG_REGISTERED)
-- 
cgit v1.2.3


From 72ef9c4125c7b257e3a714d62d778ab46583d6a3 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 13 Mar 2017 00:01:30 +0100
Subject: dccp: fix memory leak during tear-down of unsuccessful connection
 request

This patch fixes a memory leak, which happens if the connection request
is not fulfilled between parsing the DCCP options and handling the SYN
(because e.g. the backlog is full), because we forgot to free the
list of ack vectors.

Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f053198e730c..5e3a7302f774 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 	for (i = 0; i < hc->tx_seqbufc; i++)
 		kfree(hc->tx_seqbuf[i]);
 	hc->tx_seqbufc = 0;
+	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From b0addd3fa6bcd119be9428996d5d4522479ab240 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:48 +0100
Subject: USB: idmouse: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/idmouse.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 8b9fd7534f69..502bfe30a077 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -347,6 +347,9 @@ static int idmouse_probe(struct usb_interface *interface,
 	if (iface_desc->desc.bInterfaceClass != 0x0A)
 		return -ENODEV;
 
+	if (iface_desc->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL)
-- 
cgit v1.2.3


From 1dc56c52d2484be09c7398a5207d6b11a4256be9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:49 +0100
Subject: USB: lvtest: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should the probed device lack endpoints.

Note that this driver does not bind to any devices by default.

Fixes: ce21bfe603b3 ("USB: Add LVS Test device driver")
Cc: stable <stable@vger.kernel.org>     # 3.17
Cc: Pratyush Anand <pratyush.anand@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/lvstest.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c
index 77176511658f..d3d124753266 100644
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -366,6 +366,10 @@ static int lvs_rh_probe(struct usb_interface *intf,
 
 	hdev = interface_to_usbdev(intf);
 	desc = intf->cur_altsetting;
+
+	if (desc->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	endpoint = &desc->endpoint[0].desc;
 
 	/* valid only for SS root hub */
-- 
cgit v1.2.3


From f259ca3eed6e4b79ac3d5c5c9fb259fb46e86217 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:50 +0100
Subject: USB: uss720: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory beyond the endpoint array should a
malicious device lack the expected endpoints.

Note that the endpoint access that causes the NULL-deref is currently
only used for debugging purposes during probe so the oops only happens
when dynamic debugging is enabled. This means the driver could be
rewritten to continue to accept device with only two endpoints, should
such devices exist.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/uss720.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index e45a3a680db8..07014cad6dbe 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -709,6 +709,11 @@ static int uss720_probe(struct usb_interface *intf,
 
 	interface = intf->cur_altsetting;
 
+	if (interface->desc.bNumEndpoints < 3) {
+		usb_put_dev(usbdev);
+		return -ENODEV;
+	}
+
 	/*
 	 * Allocate parport interface 
 	 */
-- 
cgit v1.2.3


From 03ace948a4eb89d1cf51c06afdfc41ebca5fdb27 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:51 +0100
Subject: USB: wusbcore: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory beyond the endpoint array should a
malicious device lack the expected endpoints.

This specifically fixes the NULL-pointer dereference when probing HWA HC
devices.

Fixes: df3654236e31 ("wusb: add the Wire Adapter (WA) core")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/wusbcore/wa-hc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c
index 252c7bd9218a..d01496fd27fe 100644
--- a/drivers/usb/wusbcore/wa-hc.c
+++ b/drivers/usb/wusbcore/wa-hc.c
@@ -39,6 +39,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface,
 	int result;
 	struct device *dev = &iface->dev;
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 3)
+		return -ENODEV;
+
 	result = wa_rpipes_create(wa);
 	if (result < 0)
 		goto error_rpipes_create;
-- 
cgit v1.2.3


From daf229b15907fbfdb6ee183aac8ca428cb57e361 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:52 +0100
Subject: uwb: hwa-rc: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Note that the dereference happens in the start callback which is called
during probe.

Fixes: de520b8bd552 ("uwb: add HWA radio controller driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uwb/hwa-rc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 0aa6c3c29d17..35a1e777b449 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -823,6 +823,9 @@ static int hwarc_probe(struct usb_interface *iface,
 	struct hwarc *hwarc;
 	struct device *dev = &iface->dev;
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	result = -ENOMEM;
 	uwb_rc = uwb_rc_alloc();
 	if (uwb_rc == NULL) {
-- 
cgit v1.2.3


From 4ce362711d78a4999011add3115b8f4b0bc25e8c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:53 +0100
Subject: uwb: i1480-dfu: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Note that the dereference happens in the cmd and wait_init_done
callbacks which are called during probe.

Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uwb/i1480/dfu/usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index 2bfc846ac071..6345e85822a4 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -362,6 +362,9 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
 				 result);
 	}
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	result = -ENOMEM;
 	i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL);
 	if (i1480_usb == NULL) {
-- 
cgit v1.2.3


From 3243367b209faed5c320a4e5f9a565ee2a2ba958 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 13 Mar 2017 20:50:08 +0100
Subject: usb-core: Add LINEAR_FRAME_INTR_BINTERVAL USB quirk

Some USB 2.0 devices erroneously report millisecond values in
bInterval. The generic config code manages to catch most of them,
but in some cases it's not completely enough.

The case at stake here is a USB 2.0 braille device, which wants to
announce 10ms and thus sets bInterval to 10, but with the USB 2.0
computation that yields to 64ms.  It happens that one can type fast
enough to reach this interval and get the device buffers overflown,
leading to problematic latencies.  The generic config code does not
catch this case because the 64ms is considered a sane enough value.

This change thus adds a USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL quirk
to mark devices which actually report milliseconds in bInterval,
and marks Vario Ultra devices as needing it.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c  | 10 ++++++++++
 drivers/usb/core/quirks.c  |  8 ++++++++
 include/linux/usb/quirks.h |  6 ++++++
 3 files changed, 24 insertions(+)

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 25dbd8c7aec7..4be52c602e9b 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -280,6 +280,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 
 			/*
 			 * Adjust bInterval for quirked devices.
+			 */
+			/*
+			 * This quirk fixes bIntervals reported in ms.
+			 */
+			if (to_usb_device(ddev)->quirks &
+				USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) {
+				n = clamp(fls(d->bInterval) + 3, i, j);
+				i = j = n;
+			}
+			/*
 			 * This quirk fixes bIntervals reported in
 			 * linear microframes.
 			 */
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 24f9f98968a5..96b21b0dac1e 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -170,6 +170,14 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* M-Systems Flash Disk Pioneers */
 	{ USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Baum Vario Ultra */
+	{ USB_DEVICE(0x0904, 0x6101), .driver_info =
+			USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+	{ USB_DEVICE(0x0904, 0x6102), .driver_info =
+			USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+	{ USB_DEVICE(0x0904, 0x6103), .driver_info =
+			USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+
 	/* Keytouch QWERTY Panel keyboard */
 	{ USB_DEVICE(0x0926, 0x3333), .driver_info =
 			USB_QUIRK_CONFIG_INTF_STRINGS },
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 1d0043dc34e4..de2a722fe3cf 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -50,4 +50,10 @@
 /* device can't handle Link Power Management */
 #define USB_QUIRK_NO_LPM			BIT(10)
 
+/*
+ * Device reports its bInterval as linear frames instead of the
+ * USB 2.0 calculation.
+ */
+#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL	BIT(11)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 0090114d336a9604aa2d90bc83f20f7cd121b76c Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Fri, 10 Mar 2017 14:43:35 -0600
Subject: usb: musb: cppi41: don't check early-TX-interrupt for Isoch transfer

The CPPI 4.1 driver polls register to workaround the premature TX
interrupt issue, but it causes audio playback underrun when triggered in
Isoch transfers.

Isoch doesn't do back-to-back transfers, the TX should be done by the
time the next transfer is scheduled. So skip this polling workaround for
Isoch transfer.

Fixes: a655f481d83d6 ("usb: musb: musb_cppi41: handle pre-mature TX complete interrupt")
Cc: <stable@vger.kernel.org> #4.1+
Reported-by: Alexandre Bailon <abailon@baylibre.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_cppi41.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c
index 00e272bfee39..355655f8a3fb 100644
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -238,8 +238,27 @@ static void cppi41_dma_callback(void *private_data,
 			transferred < cppi41_channel->packet_sz)
 		cppi41_channel->prog_len = 0;
 
-	if (cppi41_channel->is_tx)
-		empty = musb_is_tx_fifo_empty(hw_ep);
+	if (cppi41_channel->is_tx) {
+		u8 type;
+
+		if (is_host_active(musb))
+			type = hw_ep->out_qh->type;
+		else
+			type = hw_ep->ep_in.type;
+
+		if (type == USB_ENDPOINT_XFER_ISOC)
+			/*
+			 * Don't use the early-TX-interrupt workaround below
+			 * for Isoch transfter. Since Isoch are periodic
+			 * transfer, by the time the next transfer is
+			 * scheduled, the current one should be done already.
+			 *
+			 * This avoids audio playback underrun issue.
+			 */
+			empty = true;
+		else
+			empty = musb_is_tx_fifo_empty(hw_ep);
+	}
 
 	if (!cppi41_channel->is_tx || empty) {
 		cppi41_trans_done(cppi41_channel);
-- 
cgit v1.2.3


From 6b7ad496084e3d8ffa844a02e0f7f76f3eb82203 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Fri, 10 Mar 2017 14:43:36 -0600
Subject: usb: musb: dsps: fix iounmap in error and exit paths

Cleanly iounmap the pointer in error and exit paths.

Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_dsps.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 7c047c4a2565..9c7ee26ef388 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -933,7 +933,7 @@ static int dsps_probe(struct platform_device *pdev)
 	if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) {
 		ret = dsps_setup_optional_vbus_irq(pdev, glue);
 		if (ret)
-			return ret;
+			goto err_iounmap;
 	}
 
 	platform_set_drvdata(pdev, glue);
@@ -946,6 +946,8 @@ static int dsps_probe(struct platform_device *pdev)
 
 err:
 	pm_runtime_disable(&pdev->dev);
+err_iounmap:
+	iounmap(glue->usbss_base);
 	return ret;
 }
 
@@ -956,6 +958,7 @@ static int dsps_remove(struct platform_device *pdev)
 	platform_device_unregister(glue->musb);
 
 	pm_runtime_disable(&pdev->dev);
+	iounmap(glue->usbss_base);
 
 	return 0;
 }
-- 
cgit v1.2.3


From bc1e2154542071e3cfe1734b143af9b8bdacf8bd Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Fri, 10 Mar 2017 14:43:37 -0600
Subject: usb: musb: fix possible spinlock deadlock

The DSPS glue calls del_timer_sync() in its musb_platform_disable()
implementation, which requires the caller to not hold a lock. But
musb_remove() calls musb_platform_disable() will musb->lock held. This
could causes spinlock deadlock.

So change musb_remove() to call musb_platform_disable() without holds
musb->lock. This doesn't impact the musb_platform_disable implementation
in other glue drivers.

root@am335x-evm:~# modprobe -r musb-dsps
[  126.134879] musb-hdrc musb-hdrc.1: remove, state 1
[  126.140465] usb usb2: USB disconnect, device number 1
[  126.146178] usb 2-1: USB disconnect, device number 2
[  126.416985] musb-hdrc musb-hdrc.1: USB bus 2 deregistered
[  126.423943]
[  126.425525] ======================================================
[  126.431997] [ INFO: possible circular locking dependency detected ]
[  126.438564] 4.11.0-rc1-00003-g1557f13bca04-dirty #77 Not tainted
[  126.444852] -------------------------------------------------------
[  126.451414] modprobe/778 is trying to acquire lock:
[  126.456523]  (((&glue->timer))){+.-...}, at: [<c01b8788>] del_timer_sync+0x0/0xd0
[  126.464403]
[  126.464403] but task is already holding lock:
[  126.470511]  (&(&musb->lock)->rlock){-.-...}, at: [<bf30b7f8>] musb_remove+0x50/0x1
30 [musb_hdrc]
[  126.479965]
[  126.479965] which lock already depends on the new lock.
[  126.479965]
[  126.488531]
[  126.488531] the existing dependency chain (in reverse order) is:
[  126.496368]
[  126.496368] -> #1 (&(&musb->lock)->rlock){-.-...}:
[  126.502968]        otg_timer+0x80/0xec [musb_dsps]
[  126.507990]        call_timer_fn+0xb4/0x390
[  126.512372]        expire_timers+0xf0/0x1fc
[  126.516754]        run_timer_softirq+0x80/0x178
[  126.521511]        __do_softirq+0xc4/0x554
[  126.525802]        irq_exit+0xe8/0x158
[  126.529735]        __handle_domain_irq+0x58/0xb8
[  126.534583]        __irq_usr+0x54/0x80
[  126.538507]
[  126.538507] -> #0 (((&glue->timer))){+.-...}:
[  126.544636]        del_timer_sync+0x40/0xd0
[  126.549066]        musb_remove+0x6c/0x130 [musb_hdrc]
[  126.554370]        platform_drv_remove+0x24/0x3c
[  126.559206]        device_release_driver_internal+0x14c/0x1e0
[  126.565225]        bus_remove_device+0xd8/0x108
[  126.569970]        device_del+0x1e4/0x308
[  126.574170]        platform_device_del+0x24/0x8c
[  126.579006]        platform_device_unregister+0xc/0x20
[  126.584394]        dsps_remove+0x14/0x30 [musb_dsps]
[  126.589595]        platform_drv_remove+0x24/0x3c
[  126.594432]        device_release_driver_internal+0x14c/0x1e0
[  126.600450]        driver_detach+0x38/0x6c
[  126.604740]        bus_remove_driver+0x4c/0xa0
[  126.609407]        SyS_delete_module+0x11c/0x1e4
[  126.614252]        __sys_trace_return+0x0/0x10

Fixes: ea2f35c01d5ea ("usb: musb: Fix sleeping function called from invalid context for hdrc glue")
Cc: <stable@vger.kernel.org> #4.9+
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index d8bae6ca8904..0c3664ab705e 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2490,8 +2490,8 @@ static int musb_remove(struct platform_device *pdev)
 	musb_host_cleanup(musb);
 	musb_gadget_cleanup(musb);
 
-	spin_lock_irqsave(&musb->lock, flags);
 	musb_platform_disable(musb);
+	spin_lock_irqsave(&musb->lock, flags);
 	musb_disable_interrupts(musb);
 	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 	spin_unlock_irqrestore(&musb->lock, flags);
-- 
cgit v1.2.3


From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 8 Feb 2017 09:24:25 +0000
Subject: serial: st-asc: Use new GPIOD API to obtain RTS pin

The commits mentioned below adapt the GPIO API to allow more information
to be passed directly through devm_get_gpiod_from_child() in the first
instance.  This facilitates the removal of subsequent calls, such as
gpiod_direction_output().  This patch firstly moves to utilise the new
API and secondly removes the now superfluous call do set the direction.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
[Also drop the header file dummies that only this driver was using]
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/tty/serial/st-asc.c   | 11 ++++++-----
 include/linux/gpio/consumer.h | 16 ----------------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index bcf1d33e6ffe..c334bcc59c64 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
 			pinctrl_select_state(ascport->pinctrl,
 					     ascport->states[NO_HW_FLOWCTRL]);
 
-			gpiod =	devm_get_gpiod_from_child(port->dev, "rts",
-							  &np->fwnode);
-			if (!IS_ERR(gpiod)) {
-				gpiod_direction_output(gpiod, 0);
+			gpiod = devm_fwnode_get_gpiod_from_child(port->dev,
+								 "rts",
+								 &np->fwnode,
+								 GPIOD_OUT_LOW,
+								 np->name);
+			if (!IS_ERR(gpiod))
 				ascport->rts = gpiod;
-			}
 		}
 	}
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2484b2fcc6eb..933d93656605 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 						struct fwnode_handle *child,
 						enum gpiod_flags flags,
 						const char *label);
-/* FIXME: delete this helper when users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-			  const char *con_id, struct fwnode_handle *child)
-{
-	return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
-						      0, child,
-						      GPIOD_ASIS,
-						      "?");
-}
 
 #else /* CONFIG_GPIOLIB */
 
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
-/* FIXME: delete this when all users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-			  const char *con_id, struct fwnode_handle *child)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
 #endif /* CONFIG_GPIOLIB */
 
 static inline
-- 
cgit v1.2.3


From 6e9f44eaaef0df7b846e9316fa9ca72a02025d44 Mon Sep 17 00:00:00 2001
From: Dan Williams <dcbw@redhat.com>
Date: Thu, 9 Mar 2017 11:32:28 -0600
Subject: USB: serial: option: add Quectel UC15, UC20, EC21, and EC25 modems

Add Quectel UC15, UC20, EC21, and EC25.  The EC20 is handled by
qcserial due to a USB VID/PID conflict with an existing Acer
device.

Signed-off-by: Dan Williams <dcbw@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 42cc72e54c05..af67a0de6b5d 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,6 +233,14 @@ static void option_instat_callback(struct urb *urb);
 #define BANDRICH_PRODUCT_1012			0x1012
 
 #define QUALCOMM_VENDOR_ID			0x05C6
+/* These Quectel products use Qualcomm's vendor ID */
+#define QUECTEL_PRODUCT_UC20			0x9003
+#define QUECTEL_PRODUCT_UC15			0x9090
+
+#define QUECTEL_VENDOR_ID			0x2c7c
+/* These Quectel products use Quectel's vendor ID */
+#define QUECTEL_PRODUCT_EC21			0x0121
+#define QUECTEL_PRODUCT_EC25			0x0125
 
 #define CMOTECH_VENDOR_ID			0x16d8
 #define CMOTECH_PRODUCT_6001			0x6001
@@ -1161,7 +1169,14 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
-	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */
+	/* Quectel products using Qualcomm vendor ID */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	/* Quectel products using Quectel vendor ID */
+	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
-- 
cgit v1.2.3


From 60b89f1928af80b546b5c3fd8714a62f6f4b8844 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Tue, 14 Mar 2017 09:38:04 +0100
Subject: ARM: at91: pm: cpu_idle: switch DDR to power-down mode

On some DDR controllers, compatible with the sama5d3 one,
the sequence to enter/exit/re-enter the self-refresh mode adds
more constrains than what is currently written in the at91_idle
driver. An actual access to the DDR chip is needed between exit
and re-enter of this mode which is somehow difficult to implement.
This sequence can completely hang the SoC. It is particularly
experienced on parts which embed a L2 cache if the code run
between IDLE calls fits in it...

Moreover, as the intention is to enter and exit pretty rapidly
from IDLE, the power-down mode is a good candidate.

So now we use power-down instead of self-refresh. As we can
simplify the code for sama5d3 compatible DDR controllers,
we instantiate a new sama5d3_ddr_standby() function.

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: <stable@vger.kernel.org> # v4.1+
Fixes: 017b5522d5e3 ("ARM: at91: Add new binding for sama5d3-ddramc")
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-at91/pm.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 3d89b7905bd9..a277981f414d 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -289,6 +289,22 @@ static void at91_ddr_standby(void)
 		at91_ramc_write(1, AT91_DDRSDRC_LPR, saved_lpr1);
 }
 
+static void sama5d3_ddr_standby(void)
+{
+	u32 lpr0;
+	u32 saved_lpr0;
+
+	saved_lpr0 = at91_ramc_read(0, AT91_DDRSDRC_LPR);
+	lpr0 = saved_lpr0 & ~AT91_DDRSDRC_LPCB;
+	lpr0 |= AT91_DDRSDRC_LPCB_POWER_DOWN;
+
+	at91_ramc_write(0, AT91_DDRSDRC_LPR, lpr0);
+
+	cpu_do_idle();
+
+	at91_ramc_write(0, AT91_DDRSDRC_LPR, saved_lpr0);
+}
+
 /* We manage both DDRAM/SDRAM controllers, we need more than one value to
  * remember.
  */
@@ -323,7 +339,7 @@ static const struct of_device_id const ramc_ids[] __initconst = {
 	{ .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby },
 	{ .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby },
 	{ .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby },
-	{ .compatible = "atmel,sama5d3-ddramc", .data = at91_ddr_standby },
+	{ .compatible = "atmel,sama5d3-ddramc", .data = sama5d3_ddr_standby },
 	{ /*sentinel*/ }
 };
 
-- 
cgit v1.2.3


From da9a796f5475b4d3a339083af719982b7ab4a12b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 16:59:57 +0000
Subject: drm/i915: Split GEM resetting into 3 phases

Currently we do a reset prepare/finish around the call to reset the GPU,
but it looks like we need a later stage after the hw has been
reinitialised to allow GEM to restart itself. Start by splitting the 2
GEM phases into 3:

  prepare - before the reset, check if GEM recovered, then stop GEM

  reset - after the reset, update GEM bookkeeping

  finish - after the re-initialisation following the reset, restart GEM

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-2-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/20170313165958.13970-1-chris@chris-wilson.co.uk
(cherry picked from commit d80270931314a88d79d9bd5e0a5df93c12196375)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 3 ++-
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 drivers/gpu/drm/i915/i915_gem.c | 7 ++++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e703556eba99..2093d203665d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1788,7 +1788,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	i915_gem_reset_finish(dev_priv);
+	i915_gem_reset(dev_priv);
 	intel_overlay_reset(dev_priv);
 
 	/* Ok, now get things going again... */
@@ -1811,6 +1811,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	i915_gem_reset_finish(dev_priv);
 	i915_queue_hangcheck(dev_priv);
 
 wakeup:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7febe6eecf72..1d20c2d00f42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3342,6 +3342,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 }
 
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 10777da73039..27fe5a9315f0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2834,7 +2834,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	engine->reset_hw(engine, request);
 }
 
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -2856,6 +2856,11 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 	}
 }
 
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+}
+
 static void nop_submit_request(struct drm_i915_gem_request *request)
 {
 	dma_fence_set_error(&request->fence, -EIO);
-- 
cgit v1.2.3


From 4565bf58d45b8aded94e446666839955cdb5030c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 16:59:58 +0000
Subject: drm/i915: Disable engine->irq_tasklet around resets

When we restart the engines, and we have active requests, a request on
the first engine may complete and queue a request to the second engine
before we try to restart the second engine. That queueing of the
request may race with the engine to restart, and so may corrupt the
current state. Disabling the engine->irq_tasklet prevents the two paths
from writing into ELSP simultaneously (and modifyin the execlists_port[]
at the same time).

Include fixup 1d309634bcf4 ("drm/i915: Kill the tasklet then disable")

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Testcase: igt/gem_exec_fence/await-hang
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-3-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/20170313165958.13970-2-chris@chris-wilson.co.uk
(cherry picked from commit 1f7b847d72c3583df5048d83bd945d0c2c524c28)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 27fe5a9315f0..1051fdf37d20 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2719,7 +2719,16 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *request;
 
+		/* Prevent request submission to the hardware until we have
+		 * completed the reset in i915_gem_reset_finish(). If a request
+		 * is completed by one engine, it may then queue a request
+		 * to a second via its engine->irq_tasklet *just* as we are
+		 * calling engine->init_hw() and also writing the ELSP.
+		 * Turning off the engine->irq_tasklet until the reset is over
+		 * prevents the race.
+		 */
 		tasklet_kill(&engine->irq_tasklet);
+		tasklet_disable(&engine->irq_tasklet);
 
 		if (engine_stalled(engine)) {
 			request = i915_gem_find_active_request(engine);
@@ -2858,7 +2867,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	for_each_engine(engine, dev_priv, id)
+		tasklet_enable(&engine->irq_tasklet);
 }
 
 static void nop_submit_request(struct drm_i915_gem_request *request)
-- 
cgit v1.2.3


From 35a3abfd198e6c69a6644784bb09a2d951fc6b21 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 17:02:31 +0000
Subject: drm/i915: Only enable hotplug interrupts if the display interrupts
 are enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to prevent accessing the hpd registers outside of the display
power wells, we should refrain from writing to the registers before the
display interrupts are enabled.

[    4.740136] WARNING: CPU: 1 PID: 221 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x44/0x50 [i915]
[    4.740155] Unclaimed read from register 0x1e1110
[    4.740168] Modules linked in: i915(+) intel_gtt drm_kms_helper prime_numbers
[    4.740190] CPU: 1 PID: 221 Comm: systemd-udevd Not tainted 4.10.0-rc6+ #384
[    4.740203] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
[    4.740220] Call Trace:
[    4.740236]  dump_stack+0x4d/0x6f
[    4.740251]  __warn+0xc1/0xe0
[    4.740265]  warn_slowpath_fmt+0x4a/0x50
[    4.740281]  ? insert_work+0x77/0xc0
[    4.740355]  ? fwtable_write32+0x90/0x130 [i915]
[    4.740431]  __unclaimed_reg_debug+0x44/0x50 [i915]
[    4.740507]  fwtable_read32+0xd8/0x130 [i915]
[    4.740575]  i915_hpd_irq_setup+0xa5/0x100 [i915]
[    4.740649]  intel_hpd_init+0x68/0x80 [i915]
[    4.740716]  i915_driver_load+0xe19/0x1380 [i915]
[    4.740784]  i915_pci_probe+0x32/0x90 [i915]
[    4.740799]  pci_device_probe+0x8b/0xf0
[    4.740815]  driver_probe_device+0x2b6/0x450
[    4.740828]  __driver_attach+0xda/0xe0
[    4.740841]  ? driver_probe_device+0x450/0x450
[    4.740853]  bus_for_each_dev+0x5b/0x90
[    4.740865]  driver_attach+0x19/0x20
[    4.740878]  bus_add_driver+0x166/0x260
[    4.740892]  driver_register+0x5b/0xd0
[    4.740906]  ? 0xffffffffa0166000
[    4.740920]  __pci_register_driver+0x47/0x50
[    4.740985]  i915_init+0x5c/0x5e [i915]
[    4.740999]  do_one_initcall+0x3e/0x160
[    4.741015]  ? __vunmap+0x7c/0xc0
[    4.741029]  ? kmem_cache_alloc+0xcf/0x120
[    4.741045]  do_init_module+0x55/0x1c4
[    4.741060]  load_module+0x1f3f/0x25b0
[    4.741073]  ? __symbol_put+0x40/0x40
[    4.741086]  ? kernel_read_file+0x100/0x190
[    4.741100]  SYSC_finit_module+0xbc/0xf0
[    4.741112]  SyS_finit_module+0x9/0x10
[    4.741125]  entry_SYSCALL_64_fastpath+0x17/0x98
[    4.741135] RIP: 0033:0x7f8559a140f9
[    4.741145] RSP: 002b:00007fff7509a3e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[    4.741161] RAX: ffffffffffffffda RBX: 00007f855aba02d1 RCX: 00007f8559a140f9
[    4.741172] RDX: 0000000000000000 RSI: 000055b6db0914f0 RDI: 0000000000000011
[    4.741183] RBP: 0000000000020000 R08: 0000000000000000 R09: 000000000000000e
[    4.741193] R10: 0000000000000011 R11: 0000000000000246 R12: 000055b6db0854d0
[    4.741204] R13: 000055b6db091150 R14: 0000000000000000 R15: 000055b6db035924

v2: Set dev_priv->display_irqs_enabled to true for all platforms other
than vlv/chv that manually control the display power domain.

Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97798
Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lyude <cpaul@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Hans de Goede <jwrdegoede@fedoraproject.org>
Cc: stable@vger.kernel.org
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170215131547.5064-1-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170231.18633-1-chris@chris-wilson.co.uk
(cherry picked from commit 262fd485ac6b476479f41f00bb104f6a1766ae66)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c      | 10 ++++++++++
 drivers/gpu/drm/i915/intel_hotplug.c | 14 ++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e6ffef2f707a..4fc8973744b4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4266,6 +4266,16 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	if (!IS_GEN2(dev_priv))
 		dev->vblank_disable_immediate = true;
 
+	/* Most platforms treat the display irq block as an always-on
+	 * power domain. vlv/chv can disable it at runtime and need
+	 * special care to avoid writing any of the display block registers
+	 * outside of the power domain. We defer setting up the display irqs
+	 * in this case to the runtime pm.
+	 */
+	dev_priv->display_irqs_enabled = true;
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		dev_priv->display_irqs_enabled = false;
+
 	dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
 	dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
 
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index b62e3f8ad415..54208bef7a83 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -219,7 +219,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 			}
 		}
 	}
-	if (dev_priv->display.hpd_irq_setup)
+	if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
@@ -425,7 +425,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 		}
 	}
 
-	if (storm_detected)
+	if (storm_detected && dev_priv->display_irqs_enabled)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock(&dev_priv->irq_lock);
 
@@ -471,10 +471,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv)
 	 * Interrupt setup is already guaranteed to be single-threaded, this is
 	 * just to make the assert_spin_locked checks happy.
 	 */
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (dev_priv->display.hpd_irq_setup)
-		dev_priv->display.hpd_irq_setup(dev_priv);
-	spin_unlock_irq(&dev_priv->irq_lock);
+	if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) {
+		spin_lock_irq(&dev_priv->irq_lock);
+		if (dev_priv->display_irqs_enabled)
+			dev_priv->display.hpd_irq_setup(dev_priv);
+		spin_unlock_irq(&dev_priv->irq_lock);
+	}
 }
 
 static void i915_hpd_poll_init_work(struct work_struct *work)
-- 
cgit v1.2.3


From 0f5418e564ac6452b9086295646e602a9addc4bf Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 13 Mar 2017 17:04:33 +0000
Subject: drm/i915: Drop support for I915_EXEC_CONSTANTS_* execbuf parameters.

This patch makes the I915_PARAM_HAS_EXEC_CONSTANTS getparam return 0
(indicating the optional feature is not supported), and makes execbuf
always return -EINVAL if the flags are used.

Apparently, no userspace ever shipped which used this optional feature:
I checked the git history of Mesa, xf86-video-intel, libva, and Beignet,
and there were zero commits showing a use of these flags.  Kernel commit
72bfa19c8deb4 apparently introduced the feature prematurely.  According
to Chris, the intention was to use this in cairo-drm, but "the use was
broken for gen6", so I don't think it ever happened.

'relative_constants_mode' has always been tracked per-device, but this
has actually been wrong ever since hardware contexts were introduced, as
the INSTPM register is saved (and automatically restored) as part of the
render ring context. The software per-device value could therefore get
out of sync with the hardware per-context value.  This meant that using
them is actually unsafe: a client which tried to use them could damage
the state of other clients, causing the GPU to interpret their BO
offsets as absolute pointers, leading to bogus memory reads.

These flags were also never ported to execlist mode, making them no-ops
on Gen9+ (which requires execlists), and Gen8 in the default mode.

On Gen8+, userspace can write these registers directly, achieving the
same effect.  On Gen6-7.5, it likely makes sense to extend the command
parser to support them.  I don't think anyone wants this on Gen4-5.

Based on a patch by Dave Gordon.

v3: Return -ENODEV for the getparam, as this is what we do for other
    obsolete features.  Suggested by Chris Wilson.

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92448
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170215093446.21291-1-kenneth@whitecape.org
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170433.26843-1-chris@chris-wilson.co.uk
(cherry picked from commit ef0f411f51475f4eebf9fc1b19a85be698af19ff)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  4 +--
 drivers/gpu/drm/i915/i915_drv.h            |  2 --
 drivers/gpu/drm/i915/i915_gem.c            |  2 --
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 52 ++----------------------------
 4 files changed, 3 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2093d203665d..6cd78bb2064d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -248,6 +248,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_IRQ_ACTIVE:
 	case I915_PARAM_ALLOW_BATCHBUFFER:
 	case I915_PARAM_LAST_DISPATCH:
+	case I915_PARAM_HAS_EXEC_CONSTANTS:
 		/* Reject all old ums/dri params. */
 		return -ENODEV;
 	case I915_PARAM_CHIPSET_ID:
@@ -274,9 +275,6 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_BSD2:
 		value = !!dev_priv->engine[VCS2];
 		break;
-	case I915_PARAM_HAS_EXEC_CONSTANTS:
-		value = INTEL_GEN(dev_priv) >= 4;
-		break;
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev_priv);
 		break;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1d20c2d00f42..80be09831a52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2064,8 +2064,6 @@ struct drm_i915_private {
 
 	const struct intel_device_info info;
 
-	int relative_constants_mode;
-
 	void __iomem *regs;
 
 	struct intel_uncore uncore;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1051fdf37d20..67b1fc5a0331 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4694,8 +4694,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 
-	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
-
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
 	dev_priv->mm.interruptible = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d02cfaefe1c8..30e0675fd7da 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1408,10 +1408,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
 	       struct drm_i915_gem_execbuffer2 *args,
 	       struct list_head *vmas)
 {
-	struct drm_i915_private *dev_priv = params->request->i915;
 	u64 exec_start, exec_len;
-	int instp_mode;
-	u32 instp_mask;
 	int ret;
 
 	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
@@ -1422,56 +1419,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
 	if (ret)
 		return ret;
 
-	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
-	instp_mask = I915_EXEC_CONSTANTS_MASK;
-	switch (instp_mode) {
-	case I915_EXEC_CONSTANTS_REL_GENERAL:
-	case I915_EXEC_CONSTANTS_ABSOLUTE:
-	case I915_EXEC_CONSTANTS_REL_SURFACE:
-		if (instp_mode != 0 && params->engine->id != RCS) {
-			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
-			return -EINVAL;
-		}
-
-		if (instp_mode != dev_priv->relative_constants_mode) {
-			if (INTEL_INFO(dev_priv)->gen < 4) {
-				DRM_DEBUG("no rel constants on pre-gen4\n");
-				return -EINVAL;
-			}
-
-			if (INTEL_INFO(dev_priv)->gen > 5 &&
-			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
-				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
-				return -EINVAL;
-			}
-
-			/* The HW changed the meaning on this bit on gen6 */
-			if (INTEL_INFO(dev_priv)->gen >= 6)
-				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
-		}
-		break;
-	default:
-		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
+	if (args->flags & I915_EXEC_CONSTANTS_MASK) {
+		DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
 		return -EINVAL;
 	}
 
-	if (params->engine->id == RCS &&
-	    instp_mode != dev_priv->relative_constants_mode) {
-		struct intel_ring *ring = params->request->ring;
-
-		ret = intel_ring_begin(params->request, 4);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit_reg(ring, INSTPM);
-		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
-		intel_ring_advance(ring);
-
-		dev_priv->relative_constants_mode = instp_mode;
-	}
-
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		ret = i915_reset_gen7_sol_offsets(params->request);
 		if (ret)
-- 
cgit v1.2.3


From 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 17:06:17 +0000
Subject: drm/i915: Stop using RP_DOWN_EI on Baytrail

On Baytrail, we manually calculate busyness over the evaluation interval
to avoid issues with miscaluations with RC6 enabled. However, it turns
out that the DOWN_EI interrupt generator is completely bust - it
operates in two modes, continuous or never. Neither of which are
conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
compute everything from the UP_EI which does seem to correspond to the
desired interval.

v2: Fixup gen6_rps_pm_mask() as well
v3: Inline vlv_c0_above() to combine the now identical elapsed
calculation for up/down and simplify the threshold testing

Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.1+
Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk
(cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
 drivers/gpu/drm/i915/intel_pm.c |  5 +--
 3 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 80be09831a52..1e53c31b6826 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1325,7 +1325,7 @@ struct intel_gen6_power_mgmt {
 	unsigned boosts;
 
 	/* manual wa residency calculations */
-	struct intel_rps_ei up_ei, down_ei;
+	struct intel_rps_ei ei;
 
 	/*
 	 * Protects RPS/RC6 register access and PCU communication.
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4fc8973744b4..b6c886ac901b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
 	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
 }
 
-static bool vlv_c0_above(struct drm_i915_private *dev_priv,
-			 const struct intel_rps_ei *old,
-			 const struct intel_rps_ei *now,
-			 int threshold)
-{
-	u64 time, c0;
-	unsigned int mul = 100;
-
-	if (old->cz_clock == 0)
-		return false;
-
-	if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
-		mul <<= 8;
-
-	time = now->cz_clock - old->cz_clock;
-	time *= threshold * dev_priv->czclk_freq;
-
-	/* Workload can be split between render + media, e.g. SwapBuffers
-	 * being blitted in X after being rendered in mesa. To account for
-	 * this we need to combine both engines into our activity counter.
-	 */
-	c0 = now->render_c0 - old->render_c0;
-	c0 += now->media_c0 - old->media_c0;
-	c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
-
-	return c0 >= time;
-}
-
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
 {
-	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
-	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+	memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
 }
 
 static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
+	const struct intel_rps_ei *prev = &dev_priv->rps.ei;
 	struct intel_rps_ei now;
 	u32 events = 0;
 
-	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
 		return 0;
 
 	vlv_c0_read(dev_priv, &now);
 	if (now.cz_clock == 0)
 		return 0;
 
-	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
-		if (!vlv_c0_above(dev_priv,
-				  &dev_priv->rps.down_ei, &now,
-				  dev_priv->rps.down_threshold))
-			events |= GEN6_PM_RP_DOWN_THRESHOLD;
-		dev_priv->rps.down_ei = now;
-	}
+	if (prev->cz_clock) {
+		u64 time, c0;
+		unsigned int mul;
 
-	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
-		if (vlv_c0_above(dev_priv,
-				 &dev_priv->rps.up_ei, &now,
-				 dev_priv->rps.up_threshold))
-			events |= GEN6_PM_RP_UP_THRESHOLD;
-		dev_priv->rps.up_ei = now;
+		mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
+		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
+			mul <<= 8;
+
+		time = now.cz_clock - prev->cz_clock;
+		time *= dev_priv->czclk_freq;
+
+		/* Workload can be split between render + media,
+		 * e.g. SwapBuffers being blitted in X after being rendered in
+		 * mesa. To account for this we need to combine both engines
+		 * into our activity counter.
+		 */
+		c0 = now.render_c0 - prev->render_c0;
+		c0 += now.media_c0 - prev->media_c0;
+		c0 *= mul;
+
+		if (c0 > time * dev_priv->rps.up_threshold)
+			events = GEN6_PM_RP_UP_THRESHOLD;
+		else if (c0 < time * dev_priv->rps.down_threshold)
+			events = GEN6_PM_RP_DOWN_THRESHOLD;
 	}
 
+	dev_priv->rps.ei = now;
 	return events;
 }
 
@@ -4228,7 +4211,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	/* Let's track the enabled rps events */
 	if (IS_VALLEYVIEW(dev_priv))
 		/* WaGsvRC0ResidencyMethod:vlv */
-		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
 	else
 		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 940bab22d464..6a29784d2b41 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4928,8 +4928,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 {
 	u32 mask = 0;
 
+	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
 	if (val > dev_priv->rps.min_freq_softlimit)
-		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
 	if (val < dev_priv->rps.max_freq_softlimit)
 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 
@@ -5039,7 +5040,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	mutex_lock(&dev_priv->rps.hw_lock);
 	if (dev_priv->rps.enabled) {
-		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
 			gen6_rps_reset_ei(dev_priv);
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
-- 
cgit v1.2.3


From abf8315f71dc5a2ee56fb60830dcb2861982dc91 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Tue, 31 Jan 2017 16:18:42 +0200
Subject: drm/tilcdc: Fix hardcoded fail-return value in tilcdc_crtc_create()

Fix badly hardcoded return return value under fail-label. All goto
branches to the label set the "ret"-variable accordingly.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
---
 drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index f80bf9385e41..abcbcd9f5851 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -1036,5 +1036,5 @@ int tilcdc_crtc_create(struct drm_device *dev)
 
 fail:
 	tilcdc_crtc_destroy(crtc);
-	return -ENOMEM;
+	return ret;
 }
-- 
cgit v1.2.3


From 11abbc9f39e002a2b25657e00abac8056cb39e93 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Wed, 1 Mar 2017 10:30:28 +0200
Subject: drm/tilcdc: Set framebuffer DMA address to HW only if CRTC is enabled

Touching HW while clocks are off is a serious error and for instance
breaks suspend functionality. After this patch tilcdc_crtc_update_fb()
always updates the primary plane's framebuffer pointer, increases fb's
reference count and stores vblank event. tilcdc_crtc_update_fb() only
writes the fb's DMA address to HW if the crtc is enabled, as
tilcdc_crtc_enable() takes care of writing the address on enable.

This patch also refactors the tilcdc_crtc_update_fb() a bit. Number of
subsequent small changes had made it almost unreadable. There should
be no other functional changes but checking the CRTC's enable
state. However, the locking goes a bit differently and some of the
redundant checks have been removed in this new version.

The enable_lock should be enough to protect the access to
tilcdc_crtc->enabled. The irq_lock protects the access to last_vblank
and next_fb. The check for vrefresh and last_vblank being valid is
redundant, as the vrefresh should be always valid if the CRTC is
enabled and now last_vblank should be too, because it is initialized
to current time when CRTC raster is enabled. If for some reason the
values are not correctly initialized the division by zero warning is
quite appropriate.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index abcbcd9f5851..d745e8b50fb8 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
+	unsigned long flags;
 
 	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 	mutex_lock(&tilcdc_crtc->enable_lock);
@@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
 	tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG,
 			  LCDC_PALETTE_LOAD_MODE(DATA_ONLY),
 			  LCDC_PALETTE_LOAD_MODE_MASK);
+
+	/* There is no real chance for a race here as the time stamp
+	 * is taken before the raster DMA is started. The spin-lock is
+	 * taken to have a memory barrier after taking the time-stamp
+	 * and to avoid a context switch between taking the stamp and
+	 * enabling the raster.
+	 */
+	spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+	tilcdc_crtc->last_vblank = ktime_get();
 	tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
+	spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
 
 	drm_crtc_vblank_on(crtc);
 
@@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
 	}
 
 	drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-	tilcdc_crtc->last_vblank = 0;
 
 	tilcdc_crtc->enabled = false;
 	mutex_unlock(&tilcdc_crtc->enable_lock);
@@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
-	unsigned long flags;
 
 	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
@@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
 	drm_framebuffer_reference(fb);
 
 	crtc->primary->fb = fb;
+	tilcdc_crtc->event = event;
 
-	spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+	mutex_lock(&tilcdc_crtc->enable_lock);
 
-	if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) {
+	if (tilcdc_crtc->enabled) {
+		unsigned long flags;
 		ktime_t next_vblank;
 		s64 tdiff;
 
-		next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
-			1000000 / crtc->hwmode.vrefresh);
+		spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
 
+		next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
+					   1000000 / crtc->hwmode.vrefresh);
 		tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get()));
 
 		if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US)
 			tilcdc_crtc->next_fb = fb;
-	}
-
-	if (tilcdc_crtc->next_fb != fb)
-		set_scanout(crtc, fb);
+		else
+			set_scanout(crtc, fb);
 
-	tilcdc_crtc->event = event;
+		spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+	}
 
-	spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+	mutex_unlock(&tilcdc_crtc->enable_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 1b2e5ea0b7061be3ffdcd85918c2f428edace4ba Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 12 Feb 2017 17:19:59 +0000
Subject: drm/i915: Always call i915_gem_reset_finish() following
 i915_gem_reset_prepare()

As i915_gem_reset_finish() undoes the steps from
i915_gem_reset_prepare() to leave the system in a fully-working state,
e.g. to be able to free the breadcrumb signal threads, make sure that we
always call it even on the error path.

Fixes: da9a796f5475 ("drm/i915: Split GEM resetting into 3 phases")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-2-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
(cherry picked from commit 8d613c539c74fa9055f88f4116196d7c820bd98f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6cd78bb2064d..1c75402a59c1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1809,10 +1809,10 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	i915_gem_reset_finish(dev_priv);
 	i915_queue_hangcheck(dev_priv);
 
 wakeup:
+	i915_gem_reset_finish(dev_priv);
 	enable_irq(dev_priv->drm.irq);
 	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
 	return;
-- 
cgit v1.2.3


From 318465adace471387cfd5f768daa64bb4c623695 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 13 Feb 2017 19:02:39 +0800
Subject: dt-bindings: rockchip-dw-mshc: rename RK1108 to RV1108

Rockchip finally named the SOC as RV1108, so change it.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
index ea9c1c9607f6..520d61dad6dd 100644
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
@@ -13,7 +13,7 @@ Required Properties:
 	- "rockchip,rk2928-dw-mshc": for Rockchip RK2928 and following,
 							before RK3288
 	- "rockchip,rk3288-dw-mshc": for Rockchip RK3288
-	- "rockchip,rk1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK1108
+	- "rockchip,rv1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RV1108
 	- "rockchip,rk3036-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3036
 	- "rockchip,rk3368-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3368
 	- "rockchip,rk3399-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3399
-- 
cgit v1.2.3


From 16681037e75ce08f2980ac5dbb03414429c7a55d Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Mon, 13 Feb 2017 14:06:10 +0200
Subject: mmc: sdhci-of-arasan: fix incorrect timeout clock

sdhci_arasan_get_timeout_clock() divides the frequency it has with (1 <<
(13 + divisor)).

However, the divisor is not some Arasan-specific value, but instead is
just the Data Timeout Counter Value from the SDHCI Timeout Control
Register.

Applying it here like this is wrong as the sdhci driver already takes
that value into account when calculating timeouts, and in fact it *sets*
that register value based on how long a timeout is wanted.

Additionally, sdhci core interprets the .get_timeout_clock callback
return value as if it were read from hardware registers, i.e. the unit
should be kHz or MHz depending on SDHCI_TIMEOUT_CLK_UNIT capability bit.
This bit is set at least on the tested Zynq-7000 SoC.

With the tested hardware (SDHCI_TIMEOUT_CLK_UNIT set) this results in
too high a timeout clock rate being reported, causing the core to use
longer-than-needed timeouts. Additionally, on a partitioned MMC
(therefore having erase_group_def bit set) mmc_calc_max_discard()
disables discard support as it looks like controller does not support
the long timeouts needed for that.

Do not apply the extra divisor and return the timeout clock in the
expected unit.

Tested with a Zynq-7000 SoC and a partitioned Toshiba THGBMAG5A1JBAWR
eMMC card.

Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Fixes: e3ec3a3d11ad ("mmc: arasan: Add driver for Arasan SDHCI")
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-arasan.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 410a55b1c25f..1cfd7f900339 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -28,13 +28,9 @@
 #include "sdhci-pltfm.h"
 #include <linux/of.h>
 
-#define SDHCI_ARASAN_CLK_CTRL_OFFSET	0x2c
 #define SDHCI_ARASAN_VENDOR_REGISTER	0x78
 
 #define VENDOR_ENHANCED_STROBE		BIT(0)
-#define CLK_CTRL_TIMEOUT_SHIFT		16
-#define CLK_CTRL_TIMEOUT_MASK		(0xf << CLK_CTRL_TIMEOUT_SHIFT)
-#define CLK_CTRL_TIMEOUT_MIN_EXP	13
 
 #define PHY_CLK_TOO_SLOW_HZ		400000
 
@@ -163,15 +159,15 @@ static int sdhci_arasan_syscon_write(struct sdhci_host *host,
 
 static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
 {
-	u32 div;
 	unsigned long freq;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	div = readl(host->ioaddr + SDHCI_ARASAN_CLK_CTRL_OFFSET);
-	div = (div & CLK_CTRL_TIMEOUT_MASK) >> CLK_CTRL_TIMEOUT_SHIFT;
+	/* SDHCI timeout clock is in kHz */
+	freq = DIV_ROUND_UP(clk_get_rate(pltfm_host->clk), 1000);
 
-	freq = clk_get_rate(pltfm_host->clk);
-	freq /= 1 << (CLK_CTRL_TIMEOUT_MIN_EXP + div);
+	/* or in MHz */
+	if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
+		freq = DIV_ROUND_UP(freq, 1000);
 
 	return freq;
 }
-- 
cgit v1.2.3


From 9c31b087348cb2b5e668261f2eee2f224b3780b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 13 Feb 2017 19:58:18 +0200
Subject: drm/i915: Reject HDMI 12bpc if the sink doesn't indicate support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check that the sink really declared 12bpc support before we enable it.
This should not actually never happen since it's mandatory for HDMI
sinks to support 12bpc if they support any deep color modes. But
reality disagrees with the theory and there are actually sinks in
the wild that violate the spec.

v2: Fix the output_types check
    Update commit message to state that these things are in fact real

Cc: stable@vger.kernel.org
Cc: Nicholas Sielicki <nicholas.sielicki@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99250
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170213175818.24958-1-ville.syrjala@linux.intel.com
Reviewed-by: Shashank Sharma <shashank.sharma@intel.com>
(cherry picked from commit c750bdd3e7e204cc88b32806c3864487a03cd84b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_hdmi.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ebae2bd83918..24b2fa5b6282 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1298,16 +1298,34 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 
 static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc_state->base.crtc->dev;
+	struct drm_i915_private *dev_priv =
+		to_i915(crtc_state->base.crtc->dev);
+	struct drm_atomic_state *state = crtc_state->base.state;
+	struct drm_connector_state *connector_state;
+	struct drm_connector *connector;
+	int i;
 
-	if (HAS_GMCH_DISPLAY(to_i915(dev)))
+	if (HAS_GMCH_DISPLAY(dev_priv))
 		return false;
 
 	/*
 	 * HDMI 12bpc affects the clocks, so it's only possible
 	 * when not cloning with other encoder types.
 	 */
-	return crtc_state->output_types == 1 << INTEL_OUTPUT_HDMI;
+	if (crtc_state->output_types != 1 << INTEL_OUTPUT_HDMI)
+		return false;
+
+	for_each_connector_in_state(state, connector, connector_state, i) {
+		const struct drm_display_info *info = &connector->display_info;
+
+		if (connector_state->crtc != crtc_state->base.crtc)
+			continue;
+
+		if ((info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36) == 0)
+			return false;
+	}
+
+	return true;
 }
 
 bool intel_hdmi_compute_config(struct intel_encoder *encoder,
-- 
cgit v1.2.3


From 773dc118756b1f38766063e90e582016be868f09 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 1 Mar 2017 14:11:47 -0800
Subject: mmc: core: Fix access to HS400-ES devices

HS400-ES devices fail to initialize with the following error messages.

mmc1: power class selection to bus width 8 ddr 0 failed
mmc1: error -110 whilst initialising MMC card

This was seen on Samsung Chromebook Plus. Code analysis points to
commit 3d4ef329757c ("mmc: core: fix multi-bit bus width without
high-speed mode"), which attempts to set the bus width for all but
HS200 devices unconditionally. However, for HS400-ES, the bus width
is already selected.

Cc: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Brian Norris <briannorris@chromium.org>
Fixes: 3d4ef329757c ("mmc: core: fix multi-bit bus width ...")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chip.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 7fd722868875..b502601df228 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1730,7 +1730,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_select_hs400(card);
 		if (err)
 			goto free_card;
-	} else {
+	} else if (!mmc_card_hs400es(card)) {
 		/* Select the desired bus width optionally */
 		err = mmc_select_bus_width(card);
 		if (err > 0 && mmc_card_hs(card)) {
-- 
cgit v1.2.3


From 2602b740e45cc64feb55d5a9ee8db744ab3becbb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 13 Mar 2017 14:36:32 +0200
Subject: mmc: block: Fix is_waiting_last_req set incorrectly

Commit 15520111500c ("mmc: core: Further fix thread wake-up") allowed a
queue to release the host with is_waiting_last_req set to true. A queue
waiting to claim the host will not reset it, which can result in the
queue getting stuck in a loop.

Fixes: 15520111500c ("mmc: core: Further fix thread wake-up")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 1621fa08e206..e59107ca512a 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1817,6 +1817,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		mmc_blk_issue_flush(mq, req);
 	} else {
 		mmc_blk_issue_rw_rq(mq, req);
+		card->host->context_info.is_waiting_last_req = false;
 	}
 
 out:
-- 
cgit v1.2.3


From 8ecc34448e24e9e8a8f3a9b37be70b43c6af5288 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 13 Mar 2017 14:36:33 +0200
Subject: mmc: block: Fix cmd error reset failure path

Commit 4e1f780032c5 ("mmc: block: break out mmc_blk_rw_cmd_abort()")
assumed the request had not completed, but in one case it had. Fix that.

Fixes: 4e1f780032c5 ("mmc: block: break out mmc_blk_rw_cmd_abort()")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index e59107ca512a..05afefcfb611 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1701,7 +1701,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 		case MMC_BLK_CMD_ERR:
 			req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending);
 			if (mmc_blk_reset(md, card->host, type)) {
-				mmc_blk_rw_cmd_abort(card, old_req);
+				if (req_pending)
+					mmc_blk_rw_cmd_abort(card, old_req);
 				mmc_blk_rw_try_restart(mq, new_req);
 				return;
 			}
-- 
cgit v1.2.3


From 0977762f6d15f13caccc20d71a5dec47d098907d Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 13 Mar 2017 13:44:35 -0700
Subject: md/r5cache: fix set_syndrome_sources() for data in cache

Before this patch, device InJournal will be included in prexor
(SYNDROME_SRC_WANT_DRAIN) but not in reconstruct (SYNDROME_SRC_WRITTEN). So it
will break parity calculation. With srctype == SYNDROME_SRC_WRITTEN, we need
include both dev with non-null ->written and dev with R5_InJournal. This fixes
logic in 1e6d690(md/r5cache: caching phase of r5cache)

Cc: stable@vger.kernel.org (v4.10+)
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6bfedfcf41c1..ed5cd705b985 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
 		     (test_bit(R5_Wantdrain, &dev->flags) ||
 		      test_bit(R5_InJournal, &dev->flags))) ||
 		    (srctype == SYNDROME_SRC_WRITTEN &&
-		     dev->written)) {
+		     (dev->written ||
+		      test_bit(R5_InJournal, &dev->flags)))) {
 			if (test_bit(R5_InJournal, &dev->flags))
 				srcs[slot] = sh->dev[i].orig_page;
 			else
-- 
cgit v1.2.3


From 9c62110454b088b4914ffe375c2dbc19643eac34 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 14 Mar 2017 11:51:59 -0600
Subject: blk-mq-sched: don't run the queue async from
 blk_mq_try_issue_directly()

If we have scheduling enabled, we jump directly to insert-and-run.
That's fine, but we run the queue async and we don't pass in information
on whether we can block from this context or not. Fixup both these
cases.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 159187a28d66..a4546f060e80 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+				      bool may_sleep)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_queue_data bd = {
@@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 	}
 
 insert:
-	blk_mq_sched_insert_request(rq, false, true, true, false);
+	blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
 }
 
 /*
@@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 		if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
 			rcu_read_lock();
-			blk_mq_try_issue_directly(old_rq, &cookie);
+			blk_mq_try_issue_directly(old_rq, &cookie, false);
 			rcu_read_unlock();
 		} else {
 			srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
-			blk_mq_try_issue_directly(old_rq, &cookie);
+			blk_mq_try_issue_directly(old_rq, &cookie, true);
 			srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
 		}
 		goto done;
-- 
cgit v1.2.3


From 8c53ad2139137dd4bf506a2c2b888de3816e8f75 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 13 Mar 2017 15:14:08 +0800
Subject: drm/amd/powerplay: fix copy error in smu7_clockpoweragting.c

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index 8cf71f3c6d0e..261b828ad590 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 	if (bgate) {
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_VCE,
-						AMD_PG_STATE_UNGATE);
+						AMD_PG_STATE_GATE);
 		cgs_set_clockgating_state(hwmgr->device,
 				AMD_IP_BLOCK_TYPE_VCE,
 				AMD_CG_STATE_GATE);
-- 
cgit v1.2.3


From 11353b9d10392e79e32603d2178e75feb25eaf0d Mon Sep 17 00:00:00 2001
From: Zhilong Liu <zlliu@suse.com>
Date: Tue, 14 Mar 2017 15:52:26 +0800
Subject: md/raid1: fix a trivial typo in comments

raid1.c: fix a trivial typo in comments of freeze_array().

Cc: Jack Wang <jack.wang.usish@gmail.com>
Cc: Guoqing Jiang <gqjiang@suse.com>
Cc: John Stoffel <john@stoffel.org>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Zhilong Liu <zlliu@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c33e96e33b8e..a34f58772022 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
 static void freeze_array(struct r1conf *conf, int extra)
 {
 	/* Stop sync I/O and normal I/O and wait for everything to
-	 * go quite.
+	 * go quiet.
 	 * This is called in two situations:
 	 * 1) management command handlers (reshape, remove disk, quiesce).
 	 * 2) one normal I/O request failed.
-- 
cgit v1.2.3


From dc434e056fe1dada20df7ba07f32739d3a701adf Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 14 Mar 2017 16:06:45 +0100
Subject: cpu/hotplug: Serialize callback invocations proper

The setup/remove_state/instance() functions in the hotplug core code are
serialized against concurrent CPU hotplug, but unfortunately not serialized
against themself.

As a consequence a concurrent invocation of these function results in
corruption of the callback machinery because two instances try to invoke
callbacks on remote cpus at the same time. This results in missing callback
invocations and initiator threads waiting forever on the completion.

The obvious solution to replace get_cpu_online() with cpu_hotplug_begin()
is not possible because at least one callsite calls into these functions
from a get_online_cpu() locked region.

Extend the protection scope of the cpuhp_state_mutex from solely protecting
the state arrays to cover the callback invocation machinery as well.

Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface")
Reported-and-tested-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: hpa@zytor.com
Cc: mingo@kernel.org
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/cpu.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index f7c063239fa5..37b223e4fc05 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
 	struct cpuhp_step *sp;
 	int ret = 0;
 
-	mutex_lock(&cpuhp_state_mutex);
-
 	if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
 		ret = cpuhp_reserve_state(state);
 		if (ret < 0)
-			goto out;
+			return ret;
 		state = ret;
 	}
 	sp = cpuhp_get_step(state);
-	if (name && sp->name) {
-		ret = -EBUSY;
-		goto out;
-	}
+	if (name && sp->name)
+		return -EBUSY;
+
 	sp->startup.single = startup;
 	sp->teardown.single = teardown;
 	sp->name = name;
 	sp->multi_instance = multi_instance;
 	INIT_HLIST_HEAD(&sp->list);
-out:
-	mutex_unlock(&cpuhp_state_mutex);
 	return ret;
 }
 
@@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
 		goto add_node;
@@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 		if (ret) {
 			if (sp->teardown.multi)
 				cpuhp_rollback_install(cpu, state, node);
-			goto err;
+			goto unlock;
 		}
 	}
 add_node:
 	ret = 0;
-	mutex_lock(&cpuhp_state_mutex);
 	hlist_add_head(node, &sp->list);
+unlock:
 	mutex_unlock(&cpuhp_state_mutex);
-
-err:
 	put_online_cpus();
 	return ret;
 }
@@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
 				    multi_instance);
@@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 		}
 	}
 out:
+	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
 	/*
 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
@@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
+
 	if (!invoke || !cpuhp_get_teardown_cb(state))
 		goto remove;
 	/*
@@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 	}
 
 remove:
-	mutex_lock(&cpuhp_state_mutex);
 	hlist_del(node);
 	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
@@ -1571,6 +1568,7 @@ remove:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
+
 /**
  * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
  * @state:	The state to remove
@@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 
 	get_online_cpus();
 
+	mutex_lock(&cpuhp_state_mutex);
 	if (sp->multi_instance) {
 		WARN(!hlist_empty(&sp->list),
 		     "Error: Removing state %d which has instances left.\n",
@@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 	}
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
-- 
cgit v1.2.3


From 37c343b4f4e70e9dc328ab04903c0ec8d154c1a4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 14 Mar 2017 08:58:08 -0400
Subject: net: Resend IGMP memberships upon peer notification.

When we notify peers of potential changes,  it's also good to update
IGMP memberships.  For example, during VM migration, updating IGMP
memberships will redirect existing multicast streams to the VM at the
new location.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8637b2b71f3d..7869ae3837ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev)
 {
 	rtnl_lock();
 	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+	call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(netdev_notify_peers);
-- 
cgit v1.2.3


From f004ec065b4879d6bc9ba0211af2169b3ce3097f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 14 Mar 2017 14:00:00 +0100
Subject: mlxsw: reg: Fix SPVM max record count

The num_rec field is 8 bit, so the maximal count number is 255. This
fixes vlans not being enabled for wider ranges than 255.

Fixes: b2e345f9a454 ("mlxsw: reg: Add Switch Port VID and Switch Port VLAN Membership registers definitions")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0899e2d310e2..65e19422b80a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -769,7 +769,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
 #define MLXSW_REG_SPVM_ID 0x200F
 #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
 #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN +	\
 		    MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
 
-- 
cgit v1.2.3


From e9093b1183bbac462d2caef3eac165778c0b1bf1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 14 Mar 2017 14:00:01 +0100
Subject: mlxsw: reg: Fix SPVMLR max record count

The num_rec field is 8 bit, so the maximal count number is 255.
This fixes vlans learning not being enabled for wider ranges than 255.

Fixes: a4feea74cd7a ("mlxsw: reg: Add Switch Port VLAN MAC Learning register definition")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 65e19422b80a..d9616daf8a70 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1702,7 +1702,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
 #define MLXSW_REG_SPVMLR_ID 0x2020
 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
 #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
 			      MLXSW_REG_SPVMLR_REC_LEN * \
 			      MLXSW_REG_SPVMLR_REC_MAX_COUNT)
-- 
cgit v1.2.3


From f3e48119b97f56fb09310c95d49da122a27003d7 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Tue, 14 Mar 2017 15:25:58 +0200
Subject: qed: Align CIDs according to DORQ requirement

The Doorbell HW block can be configured at a granularity
of 16 x CIDs, so we need to make sure that the actual number
of CIDs configured would be a multiplication of 16.

Today, when RoCE is enabled - given that the number is unaligned,
doorbelling the higher CIDs would fail to reach the firmware and
would eventually timeout.

Fixes: dbb799c39717 ("qed: Initialize hardware for new protocols")
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index d42d03df751a..7e3a6fed3da6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -422,8 +422,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
 		u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
 		u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
 		u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+		u32 align = elems_per_page * DQ_RANGE_ALIGN;
 
-		p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+		p_conn->cid_count = roundup(p_conn->cid_count, align);
 	}
 }
 
-- 
cgit v1.2.3


From 3ef310a7d99216e0fbdff29f0cb13bc54180373a Mon Sep 17 00:00:00 2001
From: Tomer Tayar <Tomer.Tayar@cavium.com>
Date: Tue, 14 Mar 2017 15:25:59 +0200
Subject: qed: Prevent creation of too-big u32-chains

Current Logic would allow the creation of a chain with U32_MAX + 1
elements, when the actual maximum supported by the driver infrastructure
is U32_MAX.

Fixes: a91eb52abb50 ("qed: Revisit chain implementation")
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index e2a081ceaf52..e518f914eab1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2389,9 +2389,8 @@ qed_chain_alloc_sanity_check(struct qed_dev *cdev,
 	 * size/capacity fields are of a u32 type.
 	 */
 	if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
-	     chain_size > 0x10000) ||
-	    (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
-	     chain_size > 0x100000000ULL)) {
+	     chain_size > ((u32)U16_MAX + 1)) ||
+	    (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
 		DP_NOTICE(cdev,
 			  "The actual chain size (0x%llx) is larger than the maximal possible value\n",
 			  chain_size);
-- 
cgit v1.2.3


From 752ecb2da11124a948567076b60767dc8034cfa5 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:00 +0200
Subject: qed: Fix mapping leak on LL2 rx flow

When receiving an Rx LL2 packet, qed fails to unmap the previous buffer.

Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support");
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 5fb34db377c8..29ae5ec2ac2d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -211,6 +211,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
 	/* If need to reuse or there's no replacement buffer, repost this */
 	if (rc)
 		goto out_post;
+	dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+			 cdev->ll2->rx_size, DMA_FROM_DEVICE);
 
 	skb = build_skb(buffer->data, 0);
 	if (!skb) {
-- 
cgit v1.2.3


From 4621ceb279d065151eb940ce8a4728b10c0646c7 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:01 +0200
Subject: qed: Free previous connections when releasing iSCSI

Fixes: fc831825f99e ("qed: Add support for hardware offloaded iSCSI")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 3a44d6b395fa..7e73b05eeab8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -786,6 +786,23 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
 	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
 }
 
+void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+			       struct qed_iscsi_conn *p_conn)
+{
+	qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+	qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+	qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct tcp_upload_params),
+			  p_conn->tcp_upload_params_virt_addr,
+			  p_conn->tcp_upload_params_phys_addr);
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct scsi_terminate_extra_params),
+			  p_conn->queue_cnts_virt_addr,
+			  p_conn->queue_cnts_phys_addr);
+	kfree(p_conn);
+}
+
 struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
 {
 	struct qed_iscsi_info *p_iscsi_info;
@@ -807,6 +824,17 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
 void qed_iscsi_free(struct qed_hwfn *p_hwfn,
 		    struct qed_iscsi_info *p_iscsi_info)
 {
+	struct qed_iscsi_conn *p_conn = NULL;
+
+	while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) {
+		p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+					  struct qed_iscsi_conn, list_entry);
+		if (p_conn) {
+			list_del(&p_conn->list_entry);
+			qed_iscsi_free_connection(p_hwfn, p_conn);
+		}
+	}
+
 	kfree(p_iscsi_info);
 }
 
-- 
cgit v1.2.3


From 1df2adedcce17ad4a39fba74f0e2b611f797fe10 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Tue, 14 Mar 2017 15:26:02 +0200
Subject: qed: Fix interrupt flags on Rx LL2

Before iterating over the the LL2 Rx ring, the ring's
spinlock is taken via spin_lock_irqsave().
The actual processing of the packet [including handling
by the protocol driver] is done without said lock,
so qed releases the spinlock and re-claims it afterwards.

Problem is that the final spin_lock_irqrestore() at the end
of the iteration uses the original flags saved from the
initial irqsave() instead of the flags from the most recent
irqsave(). So it's possible that the interrupt status would
be incorrect at the end of the processing.

Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support");
CC: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 29ae5ec2ac2d..0d3cef409c96 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -476,7 +476,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
 static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
 				      struct qed_ll2_info *p_ll2_conn,
 				      union core_rx_cqe_union *p_cqe,
-				      unsigned long lock_flags,
+				      unsigned long *p_lock_flags,
 				      bool b_last_cqe)
 {
 	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
@@ -497,10 +497,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
 			  "Mismatch between active_descq and the LL2 Rx chain\n");
 	list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-	spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+	spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
 	qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
 				    p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
-	spin_lock_irqsave(&p_rx->lock, lock_flags);
+	spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
 
 	return 0;
 }
@@ -540,7 +540,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 			break;
 		case CORE_RX_CQE_TYPE_REGULAR:
 			rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
-							cqe, flags, b_last_cqe);
+							cqe, &flags,
+							b_last_cqe);
 			break;
 		default:
 			rc = -EIO;
-- 
cgit v1.2.3


From db31d330e8b0ad8926725eb2af6f6422c07ca7ab Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:03 +0200
Subject: qed: Correct out-of-bound access in OOO history

Need to set the number of entries in database, otherwise the logic
would quickly surpass the array.

Fixes: 1d6cff4fca43 ("qed: Add iSCSI out of order packet handling")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ooo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 7d731c6cb892..378afce58b3f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -159,6 +159,8 @@ struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
 	if (!p_ooo_info->ooo_history.p_cqes)
 		goto no_history_mem;
 
+	p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES;
+
 	return p_ooo_info;
 
 no_history_mem:
-- 
cgit v1.2.3


From 6b116b1d6a521a1907b3c18cb7a8592a655f660c Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:04 +0200
Subject: qed: Enable iSCSI Out-of-Order

Missing in the initial submission, qed fails to propagate qedi's
request to enable OOO to firmware.

Fixes: fc831825f99e ("qed: Add support for hardware offloaded iSCSI")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 7e73b05eeab8..098766f7fe88 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -190,6 +190,9 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
 	p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
 	p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
 	p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+	p_init->ooo_enable = p_params->ooo_enable;
+	p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
+				  p_params->ll2_ooo_queue_id;
 	p_init->func_params.log_page_size = p_params->log_page_size;
 	val = p_params->num_tasks;
 	p_init->func_params.num_tasks = cpu_to_le16(val);
-- 
cgit v1.2.3


From d434936e4cbb10181463622962f30b989d3e9e19 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 13:12:53 +0100
Subject: mm, x86: Fix native_pud_clear build error

We still get a build error in random configurations, after this has been
modified a few times:

In file included from include/linux/mm.h:68:0,
                 from include/linux/suspend.h:8,
                 from arch/x86/kernel/asm-offsets.c:12:
arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear'
 #define pud_clear(pud)   native_pud_clear(pud)

My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED,
so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear
definition.

Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()")
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Garnier <thgarnie@google.com>
Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable-3level.h | 3 ---
 arch/x86/include/asm/pgtable.h        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5..50d35e3185f5 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-		defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024..585ee0d42d18 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)			native_pud_clear(pud)
 #endif
 
-- 
cgit v1.2.3


From c236c8e95a3d395b0494e7108f0d41cf36ec107c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 4 Mar 2017 10:27:18 +0100
Subject: futex: Fix potential use-after-free in FUTEX_REQUEUE_PI

While working on the futex code, I stumbled over this potential
use-after-free scenario. Dmitry triggered it later with syzkaller.

pi_mutex is a pointer into pi_state, which we drop the reference on in
unqueue_me_pi(). So any access to that pointer after that is bad.

Since other sites already do rt_mutex_unlock() with hb->lock held, see
for example futex_lock_pi(), simply move the unlock before
unqueue_me_pi().

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: dvhart@infradead.org
Cc: bristot@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170304093558.801744246@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/futex.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 229a744b1781..3a4775fd7468 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct rt_mutex_waiter rt_waiter;
-	struct rt_mutex *pi_mutex = NULL;
 	struct futex_hash_bucket *hb;
 	union futex_key key2 = FUTEX_KEY_INIT;
 	struct futex_q q = futex_q_init;
@@ -2907,6 +2906,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 			spin_unlock(q.lock_ptr);
 		}
 	} else {
+		struct rt_mutex *pi_mutex;
+
 		/*
 		 * We have been woken up by futex_unlock_pi(), a timeout, or a
 		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2930,18 +2931,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		if (res)
 			ret = (res < 0) ? res : 0;
 
+		/*
+		 * If fixup_pi_state_owner() faulted and was unable to handle
+		 * the fault, unlock the rt_mutex and return the fault to
+		 * userspace.
+		 */
+		if (ret && rt_mutex_owner(pi_mutex) == current)
+			rt_mutex_unlock(pi_mutex);
+
 		/* Unqueue and drop the lock. */
 		unqueue_me_pi(&q);
 	}
 
-	/*
-	 * If fixup_pi_state_owner() faulted and was unable to handle the
-	 * fault, unlock the rt_mutex and return the fault to userspace.
-	 */
-	if (ret == -EFAULT) {
-		if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
-			rt_mutex_unlock(pi_mutex);
-	} else if (ret == -EINTR) {
+	if (ret == -EINTR) {
 		/*
 		 * We've already been requeued, but cannot restart by calling
 		 * futex_lock_pi() directly. We could restart this syscall, but
-- 
cgit v1.2.3


From 9bbb25afeb182502ca4f2c4f3f88af0681b34cae Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 4 Mar 2017 10:27:19 +0100
Subject: futex: Add missing error handling to FUTEX_REQUEUE_PI

Thomas spotted that fixup_pi_state_owner() can return errors and we
fail to unlock the rt_mutex in that case.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: dvhart@infradead.org
Cc: bristot@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170304093558.867401760@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/futex.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/futex.c b/kernel/futex.c
index 3a4775fd7468..45858ec73941 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2898,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		if (q.pi_state && (q.pi_state->owner != current)) {
 			spin_lock(q.lock_ptr);
 			ret = fixup_pi_state_owner(uaddr2, &q, current);
+			if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+				rt_mutex_unlock(&q.pi_state->pi_mutex);
 			/*
 			 * Drop the reference to the pi state which
 			 * the requeue_pi() code acquired for us.
-- 
cgit v1.2.3


From 87a6b2975f0d340c75b7488d22d61d2f98fb8abf Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 13 Mar 2017 23:27:47 -0500
Subject: x86/unwind: Fix last frame check for aligned function stacks

Pavel Machek reported the following warning on x86-32:

  WARNING: kernel stack frame pointer at f50cdf98 in swapper/2:0 has bad value   (null)

The warning is caused by the unwinder not realizing that it reached the
end of the stack, due to an unusual prologue which gcc sometimes
generates for aligned stacks.  The prologue is based on a gcc feature
called the Dynamic Realign Argument Pointer (DRAP).  It's almost always
enabled for aligned stacks when -maccumulate-outgoing-args isn't set.

This issue is similar to the one fixed by the following commit:

  8023e0e2a48d ("x86/unwind: Adjust last frame check for aligned function stacks")

... but that fix was specific to x86-64.

Make the fix more generic to cover x86-32 as well, and also ensure that
the return address referred to by the frame pointer is a copy of the
original return address.

Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers")
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/50d4924db716c264b14f1633037385ec80bf89d2.1489465609.git.jpoimboe@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/unwind_frame.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 478d15dbaee4..08339262b666 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs)
 	return sizeof(*regs);
 }
 
+#ifdef CONFIG_X86_32
+#define GCC_REALIGN_WORDS 3
+#else
+#define GCC_REALIGN_WORDS 1
+#endif
+
 static bool is_last_task_frame(struct unwind_state *state)
 {
-	unsigned long bp = (unsigned long)state->bp;
-	unsigned long regs = (unsigned long)task_pt_regs(state->task);
+	unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
+	unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
 
 	/*
 	 * We have to check for the last task frame at two different locations
 	 * because gcc can occasionally decide to realign the stack pointer and
-	 * change the offset of the stack frame by a word in the prologue of a
-	 * function called by head/entry code.
+	 * change the offset of the stack frame in the prologue of a function
+	 * called by head/entry code.  Examples:
+	 *
+	 * <start_secondary>:
+	 *      push   %edi
+	 *      lea    0x8(%esp),%edi
+	 *      and    $0xfffffff8,%esp
+	 *      pushl  -0x4(%edi)
+	 *      push   %ebp
+	 *      mov    %esp,%ebp
+	 *
+	 * <x86_64_start_kernel>:
+	 *      lea    0x8(%rsp),%r10
+	 *      and    $0xfffffffffffffff0,%rsp
+	 *      pushq  -0x8(%r10)
+	 *      push   %rbp
+	 *      mov    %rsp,%rbp
+	 *
+	 * Note that after aligning the stack, it pushes a duplicate copy of
+	 * the return address before pushing the frame pointer.
 	 */
-	return bp == regs - FRAME_HEADER_SIZE ||
-	       bp == regs - FRAME_HEADER_SIZE - sizeof(long);
+	return (state->bp == last_bp ||
+		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
 }
 
 /*
-- 
cgit v1.2.3


From 49ec8f5b6ae3ab60385492cad900ffc8a523c895 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 14 Mar 2017 15:20:53 +0100
Subject: x86/intel_rdt: Put group node in rdtgroup_kn_unlock

The rdtgroup_kn_unlock waits for the last user to release and put its
node. But it's calling kernfs_put on the node which calls the
rdtgroup_kn_unlock, which might not be the group's directory node, but
another group's file node.

This race could be easily reproduced by running 2 instances
of following script:

  mount -t resctrl resctrl /sys/fs/resctrl/
  pushd /sys/fs/resctrl/
  mkdir krava
  echo "krava" > krava/schemata
  rmdir krava
  popd
  umount  /sys/fs/resctrl

It triggers the slub debug error message with following command
line config: slub_debug=,kernfs_node_cache.

Call kernfs_put on the group's node to fix it.

Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Shaohua Li <shli@fb.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1489501253-20248-1-git-send-email-jolsa@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c05509d38b1f..9ac2a5cdd9c2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 	    (rdtgrp->flags & RDT_DELETED)) {
 		kernfs_unbreak_active_protection(kn);
-		kernfs_put(kn);
+		kernfs_put(rdtgrp->kn);
 		kfree(rdtgrp);
 	} else {
 		kernfs_unbreak_active_protection(kn);
-- 
cgit v1.2.3


From 655d9ca9ac075da1ef2a45012ba48a39f6eb1f58 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 22:27:11 +0100
Subject: drm: amd: remove broken include path

The AMD ACP driver adds "-I../acp -I../acp/include" to the gcc command
line, which makes no sense, since these are evaluated relative to the
build directory. When we build with "make W=1", they instead cause
a warning:

cc1: error: ../acp/: No such file or directory [-Werror=missing-include-dirs]
cc1: error: ../acp/include: No such file or directory [-Werror=missing-include-dirs]
cc1: all warnings being treated as errors
../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.o' failed
../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.o' failed
../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.o' failed

This removes the subdir-ccflags variable that evidently did not
serve any purpose here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/acp/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
index 8363cb57915b..8a08e81ee90d 100644
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -3,6 +3,4 @@
 # of AMDSOC/AMDGPU drm driver.
 # It provides the HW control for ACP related functionalities.
 
-subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
-
 AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
-- 
cgit v1.2.3


From 630a04e79dd41ff746b545d4fc052e0abb836120 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 15 Mar 2017 00:24:25 -0700
Subject: xfs: verify inline directory data forks

When we're reading or writing the data fork of an inline directory,
check the contents to make sure we're not overflowing buffers or eating
garbage data.  xfs/348 corrupts an inline symlink into an inline
directory, triggering a buffer overflow bug.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
v2: add more checks consistent with _dir2_sf_check and make the verifier
usable from anywhere.
---
 fs/xfs/libxfs/xfs_dir2_priv.h  |  2 +
 fs/xfs/libxfs/xfs_dir2_sf.c    | 87 ++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_inode_fork.c | 26 +++++++++++--
 fs/xfs/libxfs/xfs_inode_fork.h |  2 +-
 fs/xfs/xfs_dir2_readdir.c      | 11 ------
 fs/xfs/xfs_inode.c             | 12 ++++--
 6 files changed, 122 insertions(+), 18 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274..eb00bc133bca 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+		int size);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197..96b45cd6c63f 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
 }
 #endif	/* DEBUG */
 
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+	struct xfs_mount		*mp,
+	struct xfs_dir2_sf_hdr		*sfp,
+	int				size)
+{
+	struct xfs_dir2_sf_entry	*sfep;
+	struct xfs_dir2_sf_entry	*next_sfep;
+	char				*endp;
+	const struct xfs_dir_ops	*dops;
+	xfs_ino_t			ino;
+	int				i;
+	int				i8count;
+	int				offset;
+	__uint8_t			filetype;
+
+	dops = xfs_dir_get_ops(mp, NULL);
+
+	/*
+	 * Give up if the directory is way too short.
+	 */
+	XFS_WANT_CORRUPTED_RETURN(mp, size >
+			offsetof(struct xfs_dir2_sf_hdr, parent));
+	XFS_WANT_CORRUPTED_RETURN(mp, size >=
+			xfs_dir2_sf_hdr_size(sfp->i8count));
+
+	endp = (char *)sfp + size;
+
+	/* Check .. entry */
+	ino = dops->sf_get_parent_ino(sfp);
+	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+	XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+	offset = dops->data_first_offset;
+
+	/* Check all reported entries */
+	sfep = xfs_dir2_sf_firstentry(sfp);
+	for (i = 0; i < sfp->count; i++) {
+		/*
+		 * struct xfs_dir2_sf_entry has a variable length.
+		 * Check the fixed-offset parts of the structure are
+		 * within the data buffer.
+		 */
+		XFS_WANT_CORRUPTED_RETURN(mp,
+				((char *)sfep + sizeof(*sfep)) < endp);
+
+		/* Don't allow names with known bad length. */
+		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+		/*
+		 * Check that the variable-length part of the structure is
+		 * within the data buffer.  The next entry starts after the
+		 * name component, so nextentry is an acceptable test.
+		 */
+		next_sfep = dops->sf_nextentry(sfp, sfep);
+		XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+		/* Check that the offsets always increase. */
+		XFS_WANT_CORRUPTED_RETURN(mp,
+				xfs_dir2_sf_get_offset(sfep) >= offset);
+
+		/* Check the inode number. */
+		ino = dops->sf_get_ino(sfp, sfep);
+		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+		XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+		/* Check the file type. */
+		filetype = dops->sf_get_ftype(sfep);
+		XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+		offset = xfs_dir2_sf_get_offset(sfep) +
+				dops->data_entsize(sfep->namelen);
+
+		sfep = next_sfep;
+	}
+	XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+	XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+	/* Make sure this whole thing ought to be in local format. */
+	XFS_WANT_CORRUPTED_RETURN(mp, offset +
+	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+	return 0;
+}
+
 /*
  * Create a new (shortform) directory.
  */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 25c1e078aef6..9653e964eda4 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -320,6 +322,7 @@ xfs_iformat_local(
 	int		whichfork,
 	int		size)
 {
+	int		error;
 
 	/*
 	 * If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
 		return -EFSCORRUPTED;
 	}
 
+	if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+		error = xfs_dir2_sf_verify(ip->i_mount,
+				(struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+				size);
+		if (error)
+			return error;
+	}
+
 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
 	return 0;
 }
@@ -856,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-void
+int
 xfs_iflush_fork(
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
 	char			*cp;
 	xfs_ifork_t		*ifp;
 	xfs_mount_t		*mp;
+	int			error;
 	static const short	brootflag[2] =
 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
 	static const short	dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
 	if (!iip)
-		return;
+		return 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	/*
 	 * This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
 	 */
 	if (!ifp) {
 		ASSERT(whichfork == XFS_ATTR_FORK);
-		return;
+		return 0;
 	}
 	cp = XFS_DFORK_PTR(dip, whichfork);
 	mp = ip->i_mount;
 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
 	case XFS_DINODE_FMT_LOCAL:
+		if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+			error = xfs_dir2_sf_verify(mp,
+					(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+					ifp->if_bytes);
+			if (error)
+				return error;
+		}
 		if ((iip->ili_fields & dataflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
 		ASSERT(0);
 		break;
 	}
+	return 0;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7fb8365326d1..132dc59fdde6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
 				struct xfs_inode_log_item *, int);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
 void		xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd8..ad9396e516f6 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
 	struct xfs_da_geometry	*geo = args->geo;
 
 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Give up if the directory is way too short.
-	 */
-	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return -EIO;
-	}
-
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 
-	if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
-		return -EFSCORRUPTED;
-
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7eaf1ef74e3c..c7fe2c2123ab 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
 	struct xfs_inode_log_item *iip = ip->i_itemp;
 	struct xfs_dinode	*dip;
 	struct xfs_mount	*mp = ip->i_mount;
+	int			error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
 		ip->i_d.di_flushiter = 0;
 
-	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-	if (XFS_IFORK_Q(ip))
-		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+	error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+	if (error)
+		return error;
+	if (XFS_IFORK_Q(ip)) {
+		error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+		if (error)
+			return error;
+	}
 	xfs_inobp_check(mp, bp);
 
 	/*
-- 
cgit v1.2.3


From e67351d56a709853046fbe652b981fb7ca4c3dcc Mon Sep 17 00:00:00 2001
From: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Date: Wed, 15 Mar 2017 11:57:47 +0200
Subject: drm/i915/glk: Remove MODULE_FIRMWARE() tag from Geminilake's DMC

Geminilake's DMC is not yet available in the linux-firmware repository.
To prevent userspace tools such as mkinitramfs to complain about
missing firmware, remove the MODULE_FIRMWARE() tag for now.

Fixes: dbb28b5c3d3c ("drm/i915/DMC/GLK: Load DMC on GLK")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org>
Signed-off-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170306085651.14008-1-ander.conselvan.de.oliveira@intel.com
Link: http://patchwork.freedesktop.org/patch/msgid/20170315095747.21845-1-ander.conselvan.de.oliveira@intel.com
(cherry picked from commit d9321a03efcda867b3a8c6327e01808516f0acd7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_csr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 0085bc745f6a..de219b71fb76 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,7 +35,6 @@
  */
 
 #define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin"
-MODULE_FIRMWARE(I915_CSR_GLK);
 #define GLK_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
 
 #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
-- 
cgit v1.2.3


From e609ccef5222c73b46b322be7d3796d60bff353d Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 24 Feb 2017 16:04:15 +0200
Subject: intel_th: Don't leak module refcount on failure to activate

Output 'activation' may fail for the reasons of the output driver,
for example, if msc's buffer is not allocated. We forget, however,
to drop the module reference in this case. So each attempt at
activation in this case leaks a reference, preventing the module
from ever unloading.

This patch adds the missing module_put() in the activation error
path.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: stable@vger.kernel.org # v4.8+
---
 drivers/hwtracing/intel_th/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index cdd9b3b26195..7563eceeaaea 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -221,8 +221,10 @@ static int intel_th_output_activate(struct intel_th_device *thdev)
 	else
 		intel_th_trace_enable(thdev);
 
-	if (ret)
+	if (ret) {
 		pm_runtime_put(&thdev->dev);
+		module_put(thdrv->driver.owner);
+	}
 
 	return ret;
 }
-- 
cgit v1.2.3


From 5118ccd34780f4637a9360be580f41f4c1feab48 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 8 Sep 2015 14:03:55 +0300
Subject: intel_th: pci: Add Denverton SOC support

This adds Intel(R) Trace Hub PCI ID for Denverton SOC.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 drivers/hwtracing/intel_th/pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 0bba3842336e..04bd57b0100a 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -85,6 +85,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
 		.driver_data = (kernel_ulong_t)0,
 	},
+	{
+		/* Denverton */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
+		.driver_data = (kernel_ulong_t)0,
+	},
 	{ 0 },
 };
 
-- 
cgit v1.2.3


From 340837f985c2cb87ca0868d4aa9ce42b0fab3a21 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Thu, 30 Jun 2016 16:10:51 +0300
Subject: intel_th: pci: Add Gemini Lake support

This adds Intel(R) Trace Hub PCI ID for Gemini Lake SOC.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 drivers/hwtracing/intel_th/pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 04bd57b0100a..590cf90dd21a 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -90,6 +90,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
 		.driver_data = (kernel_ulong_t)0,
 	},
+	{
+		/* Gemini Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+		.driver_data = (kernel_ulong_t)0,
+	},
 	{ 0 },
 };
 
-- 
cgit v1.2.3


From 28ea06c46fbcab63fd9a55531387b7928a18a590 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 6 Mar 2017 12:58:42 -0500
Subject: gfs2: Avoid alignment hole in struct lm_lockname

Commit 88ffbf3e03 switches to using rhashtables for glocks, hashing over
the entire struct lm_lockname instead of its individual fields.  On some
architectures, struct lm_lockname contains a hole of uninitialized
memory due to alignment rules, which now leads to incorrect hash values.
Get rid of that hole.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
CC: <stable@vger.kernel.org> #v4.3+
---
 fs/gfs2/incore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c45084ac642d..511e1ed7e2de 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
 	struct gfs2_sbd *ln_sbd;
 	u64 ln_number;
 	unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
 
 #define lm_name_equal(name1, name2) \
         (((name1)->ln_number == (name2)->ln_number) &&	\
-- 
cgit v1.2.3


From 8af42949d1681379c1a97d230de9242c1f4f326a Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 07:41:55 +0900
Subject: openrisc: xchg: fix `computed is not used` warning

When building allmodconfig this warning shows.

  fs/ocfs2/file.c: In function 'ocfs2_file_write_iter':
  ./arch/openrisc/include/asm/cmpxchg.h:81:3: warning: value computed is
  not used [-Wunused-value]
    ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
     ^

Applying the same patch logic that was done to the cmpxchg macro.

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/cmpxchg.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index 5fcb9ac72693..f0a5d8b844d6 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 	return val;
 }
 
-#define xchg(ptr, with) \
-	((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, with) 						\
+	({								\
+		(__typeof__(*(ptr))) __xchg((unsigned long)(with),	\
+					    (ptr),			\
+					    sizeof(*(ptr)));		\
+	})
 
 #endif /* __ASM_OPENRISC_CMPXCHG_H */
-- 
cgit v1.2.3


From 154e67cd8e8f964809d0e75e44bb121b169c75b3 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 07:44:45 +0900
Subject: openrisc: fix issue handling 8 byte get_user calls

Was getting the following error with allmodconfig:

  ERROR: "__get_user_bad" [lib/test_user_copy.ko] undefined!

This was simply a missing break statement, causing an unwanted fall
through.

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 140faa16685a..1311e6b13991 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -211,7 +211,7 @@ do {									\
 	case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break;		\
 	case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break;		\
 	case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break;		\
-	case 8: __get_user_asm2(x, ptr, retval);			\
+	case 8: __get_user_asm2(x, ptr, retval); break;			\
 	default: (x) = __get_user_bad();				\
 	}								\
 } while (0)
-- 
cgit v1.2.3


From 363dad58e4a0f72dce0bf12d361d617239a80317 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Tue, 14 Mar 2017 22:52:49 +0900
Subject: openrisc: Export symbols needed by modules

This was detected by allmodconfig, errors reported:

 ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined!
 ERROR: "__ucmpdi2" [lib/842/842_decompress.ko] undefined!
 ERROR: "empty_zero_page" [fs/nfs/objlayout/objlayoutdriver.ko] undefined!
 ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined!
 ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined!
 ERROR: "__ucmpdi2" [fs/btrfs/btrfs.ko] undefined!
 ERROR: "pm_power_off" [drivers/regulator/act8865-regulator.ko] undefined!
 ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined!
 ERROR: "__clear_user" [drivers/md/dm-mod.ko] undefined!
 ERROR: "__clear_user" [net/netfilter/x_tables.ko] undefined!

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/or32_ksyms.c | 4 ++++
 arch/openrisc/kernel/process.c    | 1 +
 2 files changed, 5 insertions(+)

diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 5c4695d13542..ee3e604959e1 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -30,6 +30,7 @@
 #include <asm/hardirq.h>
 #include <asm/delay.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
 
@@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3);
 DECLARE_EXPORT(__ashrdi3);
 DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__ucmpdi2);
 
+EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 828a29110459..f8da545854f9 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -90,6 +90,7 @@ void arch_cpu_idle(void)
 }
 
 void (*pm_power_off) (void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
 
 /*
  * When a process does an "exec", machine state like FPU and debug
-- 
cgit v1.2.3


From e4c204ced0ac25e02e58679f07096c5bac0b0d96 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 14 Mar 2017 16:18:34 +0100
Subject: cpufreq: intel_pstate: Avoid percentages in limits-related
 computations

Currently, intel_pstate_update_perf_limits() first converts the
policy minimum and maximum limits into percentages of the maximum
turbo frequency (rounding up to an integer) and then converts these
percentages to fractions (by using fixed-point arithmetic to divide
them by 100).

That introduces a rounding error unnecessarily, because the fractions
can be obtained by carrying out fixed-point divisions directly on the
input numbers.

Rework the computations in intel_pstate_hwp_set() to use fractions
instead of percentages (and drop redundant local variables from
there) and modify intel_pstate_update_perf_limits() to compute the
fractions directly and percentages out of them.

While at it, introduce percent_ext_fp() for converting percentages
to fractions (with extended number of fraction bits) and use it in
the computations.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 56 +++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ee12641ee010..08e134ffba68 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y)
 	return div64_u64(x << EXT_FRAC_BITS, y);
 }
 
+static inline int32_t percent_ext_fp(int percent)
+{
+	return div_ext_fp(percent, 100);
+}
+
 /**
  * struct sample -	Store performance sample
  * @core_avg_perf:	Ratio of APERF/MPERF which is the actual average
@@ -850,7 +855,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 	u64 value, cap;
 
 	for_each_cpu(cpu, policy->cpus) {
-		int max_perf_pct, min_perf_pct;
 		struct cpudata *cpu_data = all_cpu_data[cpu];
 		s16 epp;
 
@@ -864,16 +868,14 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
 
-		max_perf_pct = perf_limits->max_perf_pct;
-		min_perf_pct = perf_limits->min_perf_pct;
-		min = hw_max * min_perf_pct / 100;
+		min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
 		value &= ~HWP_MIN_PERF(~0L);
 		value |= HWP_MIN_PERF(min);
 
-		max = hw_max * max_perf_pct / 100;
+		max = fp_ext_toint(hw_max * perf_limits->max_perf);
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
 
@@ -1223,7 +1225,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->max_perf_pct);
 	limits->max_perf_pct = max(limits->min_perf_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+	limits->max_perf = percent_ext_fp(limits->max_perf_pct);
 
 	intel_pstate_update_policies();
 
@@ -1260,7 +1262,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->min_perf_pct);
 	limits->min_perf_pct = min(limits->max_perf_pct,
 				   limits->min_perf_pct);
-	limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+	limits->min_perf = percent_ext_fp(limits->min_perf_pct);
 
 	intel_pstate_update_policies();
 
@@ -2078,36 +2080,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 					    struct perf_limits *limits)
 {
+	int32_t max_policy_perf, min_policy_perf;
 
-	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
-					      policy->cpuinfo.max_freq);
-	limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+	max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
 	if (policy->max == policy->min) {
-		limits->min_policy_pct = limits->max_policy_pct;
+		min_policy_perf = max_policy_perf;
 	} else {
-		limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
-						      policy->cpuinfo.max_freq);
-		limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
-						 0, 100);
+		min_policy_perf = div_ext_fp(policy->min,
+					     policy->cpuinfo.max_freq);
+		min_policy_perf = clamp_t(int32_t, min_policy_perf,
+					  0, max_policy_perf);
 	}
 
-	/* Normalize user input to [min_policy_pct, max_policy_pct] */
-	limits->min_perf_pct = max(limits->min_policy_pct,
-				   limits->min_sysfs_pct);
-	limits->min_perf_pct = min(limits->max_policy_pct,
-				   limits->min_perf_pct);
-	limits->max_perf_pct = min(limits->max_policy_pct,
-				   limits->max_sysfs_pct);
-	limits->max_perf_pct = max(limits->min_policy_pct,
-				   limits->max_perf_pct);
+	/* Normalize user input to [min_perf, max_perf] */
+	limits->min_perf = max(min_policy_perf,
+			       percent_ext_fp(limits->min_sysfs_pct));
+	limits->min_perf = min(limits->min_perf, max_policy_perf);
+	limits->max_perf = min(max_policy_perf,
+			       percent_ext_fp(limits->max_sysfs_pct));
+	limits->max_perf = max(min_policy_perf, limits->max_perf);
 
-	/* Make sure min_perf_pct <= max_perf_pct */
-	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
+	/* Make sure min_perf <= max_perf */
+	limits->min_perf = min(limits->min_perf, limits->max_perf);
 
-	limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
-	limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 	limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
 	limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+	limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
+	limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
 
 	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
 		 limits->max_perf_pct, limits->min_perf_pct);
-- 
cgit v1.2.3


From 4494dbc6dec37817f2cc2aa7604039a9e87ada18 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Wed, 15 Mar 2017 22:22:08 +0800
Subject: netfilter: nft_ct: do cleanup work when NFTA_CT_DIRECTION is invalid

We should jump to invoke __nft_ct_set_destroy() instead of just
return error.

Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_ct.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 91585b5e5307..0264258c46fe 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -544,7 +544,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 		case IP_CT_DIR_REPLY:
 			break;
 		default:
-			return -EINVAL;
+			err = -EINVAL;
+			goto err1;
 		}
 	}
 
-- 
cgit v1.2.3


From 1b53cf9815bb4744958d41f3795d5d5a1d365e2d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 21 Feb 2017 15:07:11 -0800
Subject: fscrypt: remove broken support for detecting keyring key revocation

Filesystem encryption ostensibly supported revoking a keyring key that
had been used to "unlock" encrypted files, causing those files to become
"locked" again.  This was, however, buggy for several reasons, the most
severe of which was that when key revocation happened to be detected for
an inode, its fscrypt_info was immediately freed, even while other
threads could be using it for encryption or decryption concurrently.
This could be exploited to crash the kernel or worse.

This patch fixes the use-after-free by removing the code which detects
the keyring key having been revoked, invalidated, or expired.  Instead,
an encrypted inode that is "unlocked" now simply remains unlocked until
it is evicted from memory.  Note that this is no worse than the case for
block device-level encryption, e.g. dm-crypt, and it still remains
possible for a privileged user to evict unused pages, inodes, and
dentries by running 'sync; echo 3 > /proc/sys/vm/drop_caches', or by
simply unmounting the filesystem.  In fact, one of those actions was
already needed anyway for key revocation to work even somewhat sanely.
This change is not expected to break any applications.

In the future I'd like to implement a real API for fscrypt key
revocation that interacts sanely with ongoing filesystem operations ---
waiting for existing operations to complete and blocking new operations,
and invalidating and sanitizing key material and plaintext from the VFS
caches.  But this is a hard problem, and for now this bug must be fixed.

This bug affected almost all versions of ext4, f2fs, and ubifs
encryption, and it was potentially reachable in any kernel configured
with encryption support (CONFIG_EXT4_ENCRYPTION=y,
CONFIG_EXT4_FS_ENCRYPTION=y, CONFIG_F2FS_FS_ENCRYPTION=y, or
CONFIG_UBIFS_FS_ENCRYPTION=y).  Note that older kernels did not use the
shared fs/crypto/ code, but due to the potential security implications
of this bug, it may still be worthwhile to backport this fix to them.

Fixes: b7236e21d55f ("ext4 crypto: reorganize how we store keys in the inode")
Cc: stable@vger.kernel.org # v4.2+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: Michael Halcrow <mhalcrow@google.com>
---
 fs/crypto/crypto.c          | 10 +--------
 fs/crypto/fname.c           |  2 +-
 fs/crypto/fscrypt_private.h |  4 ----
 fs/crypto/keyinfo.c         | 52 ++++++++-------------------------------------
 4 files changed, 11 insertions(+), 57 deletions(-)

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 02a7a9286449..6d6eca394d4d 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -327,7 +327,6 @@ EXPORT_SYMBOL(fscrypt_decrypt_page);
 static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *dir;
-	struct fscrypt_info *ci;
 	int dir_has_key, cached_with_key;
 
 	if (flags & LOOKUP_RCU)
@@ -339,18 +338,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 		return 0;
 	}
 
-	ci = d_inode(dir)->i_crypt_info;
-	if (ci && ci->ci_keyring_key &&
-	    (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					  (1 << KEY_FLAG_REVOKED) |
-					  (1 << KEY_FLAG_DEAD))))
-		ci = NULL;
-
 	/* this should eventually be an flag in d_flags */
 	spin_lock(&dentry->d_lock);
 	cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
 	spin_unlock(&dentry->d_lock);
-	dir_has_key = (ci != NULL);
+	dir_has_key = (d_inode(dir)->i_crypt_info != NULL);
 	dput(dir);
 
 	/*
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 13052b85c393..37b49894c762 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
 		fname->disk_name.len = iname->len;
 		return 0;
 	}
-	ret = fscrypt_get_crypt_info(dir);
+	ret = fscrypt_get_encryption_info(dir);
 	if (ret && ret != -EOPNOTSUPP)
 		return ret;
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index fdbb8af32eaf..e39696e64494 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -67,7 +67,6 @@ struct fscrypt_info {
 	u8 ci_filename_mode;
 	u8 ci_flags;
 	struct crypto_skcipher *ci_ctfm;
-	struct key *ci_keyring_key;
 	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
 };
 
@@ -101,7 +100,4 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
 extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
 					      gfp_t gfp_flags);
 
-/* keyinfo.c */
-extern int fscrypt_get_crypt_info(struct inode *);
-
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 02eb6b9e4438..cb3e82abf034 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -95,6 +95,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 	kfree(description);
 	if (IS_ERR(keyring_key))
 		return PTR_ERR(keyring_key);
+	down_read(&keyring_key->sem);
 
 	if (keyring_key->type != &key_type_logon) {
 		printk_once(KERN_WARNING
@@ -102,11 +103,9 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 		res = -ENOKEY;
 		goto out;
 	}
-	down_read(&keyring_key->sem);
 	ukp = user_key_payload(keyring_key);
 	if (ukp->datalen != sizeof(struct fscrypt_key)) {
 		res = -EINVAL;
-		up_read(&keyring_key->sem);
 		goto out;
 	}
 	master_key = (struct fscrypt_key *)ukp->data;
@@ -117,17 +116,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 				"%s: key size incorrect: %d\n",
 				__func__, master_key->size);
 		res = -ENOKEY;
-		up_read(&keyring_key->sem);
 		goto out;
 	}
 	res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
-	up_read(&keyring_key->sem);
-	if (res)
-		goto out;
-
-	crypt_info->ci_keyring_key = keyring_key;
-	return 0;
 out:
+	up_read(&keyring_key->sem);
 	key_put(keyring_key);
 	return res;
 }
@@ -169,12 +162,11 @@ static void put_crypt_info(struct fscrypt_info *ci)
 	if (!ci)
 		return;
 
-	key_put(ci->ci_keyring_key);
 	crypto_free_skcipher(ci->ci_ctfm);
 	kmem_cache_free(fscrypt_info_cachep, ci);
 }
 
-int fscrypt_get_crypt_info(struct inode *inode)
+int fscrypt_get_encryption_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
@@ -184,21 +176,15 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	u8 *raw_key = NULL;
 	int res;
 
+	if (inode->i_crypt_info)
+		return 0;
+
 	res = fscrypt_initialize(inode->i_sb->s_cop->flags);
 	if (res)
 		return res;
 
 	if (!inode->i_sb->s_cop->get_context)
 		return -EOPNOTSUPP;
-retry:
-	crypt_info = ACCESS_ONCE(inode->i_crypt_info);
-	if (crypt_info) {
-		if (!crypt_info->ci_keyring_key ||
-				key_validate(crypt_info->ci_keyring_key) == 0)
-			return 0;
-		fscrypt_put_encryption_info(inode, crypt_info);
-		goto retry;
-	}
 
 	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
 	if (res < 0) {
@@ -229,7 +215,6 @@ retry:
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
 	crypt_info->ci_ctfm = NULL;
-	crypt_info->ci_keyring_key = NULL;
 	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
 				sizeof(crypt_info->ci_master_key));
 
@@ -273,14 +258,8 @@ retry:
 	if (res)
 		goto out;
 
-	kzfree(raw_key);
-	raw_key = NULL;
-	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
-		put_crypt_info(crypt_info);
-		goto retry;
-	}
-	return 0;
-
+	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+		crypt_info = NULL;
 out:
 	if (res == -ENOKEY)
 		res = 0;
@@ -288,6 +267,7 @@ out:
 	kzfree(raw_key);
 	return res;
 }
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
 
 void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
 {
@@ -305,17 +285,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
 	put_crypt_info(ci);
 }
 EXPORT_SYMBOL(fscrypt_put_encryption_info);
-
-int fscrypt_get_encryption_info(struct inode *inode)
-{
-	struct fscrypt_info *ci = inode->i_crypt_info;
-
-	if (!ci ||
-		(ci->ci_keyring_key &&
-		 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					       (1 << KEY_FLAG_REVOKED) |
-					       (1 << KEY_FLAG_DEAD)))))
-		return fscrypt_get_crypt_info(inode);
-	return 0;
-}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);
-- 
cgit v1.2.3


From 85b29008d8af6d94a0723aaa8d93cfb6e041158b Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Fri, 10 Mar 2017 14:35:11 -0600
Subject: scsi: hpsa: update check for logical volume status

 - Add in a new case for volume offline. Resolves internal testing bug
   for multilun array management.
 - Return correct status for failed TURs.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c     | 35 ++++++++++++++++-------------------
 drivers/scsi/hpsa_cmd.h |  2 ++
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 524a0c755ed7..90b76c4c6d36 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3714,7 +3714,7 @@ exit_failed:
  *  # (integer code indicating one of several NOT READY states
  *     describing why a volume is to be kept offline)
  */
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
 					unsigned char scsi3addr[])
 {
 	struct CommandList *c;
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 					DEFAULT_TIMEOUT);
 	if (rc) {
 		cmd_free(h, c);
-		return 0;
+		return HPSA_VPD_LV_STATUS_UNSUPPORTED;
 	}
 	sense = c->err_info->SenseInfo;
 	if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 	cmd_status = c->err_info->CommandStatus;
 	scsi_status = c->err_info->ScsiStatus;
 	cmd_free(h, c);
-	/* Is the volume 'not ready'? */
-	if (cmd_status != CMD_TARGET_STATUS ||
-		scsi_status != SAM_STAT_CHECK_CONDITION ||
-		sense_key != NOT_READY ||
-		asc != ASC_LUN_NOT_READY)  {
-		return 0;
-	}
 
 	/* Determine the reason for not ready state */
 	ldstat = hpsa_get_volume_status(h, scsi3addr);
 
 	/* Keep volume offline in certain cases: */
 	switch (ldstat) {
+	case HPSA_LV_FAILED:
 	case HPSA_LV_UNDERGOING_ERASE:
 	case HPSA_LV_NOT_AVAILABLE:
 	case HPSA_LV_UNDERGOING_RPI:
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 	default:
 		break;
 	}
-	return 0;
+	return HPSA_LV_OK;
 }
 
 /*
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 	/* Do an inquiry to the device to see what it is. */
 	if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
 		(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
-		/* Inquiry failed (msg printed already) */
 		dev_err(&h->pdev->dev,
-			"hpsa_update_device_info: inquiry failed\n");
-		rc = -EIO;
+			"%s: inquiry failed, device will be skipped.\n",
+			__func__);
+		rc = HPSA_INQUIRY_FAILED;
 		goto bail_out;
 	}
 
@@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 	if ((this_device->devtype == TYPE_DISK ||
 		this_device->devtype == TYPE_ZBC) &&
 		is_logical_dev_addr_mode(scsi3addr)) {
-		int volume_offline;
+		unsigned char volume_offline;
 
 		hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
 		if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
 			hpsa_get_ioaccel_status(h, scsi3addr, this_device);
 		volume_offline = hpsa_volume_offline(h, scsi3addr);
-		if (volume_offline < 0 || volume_offline > 0xff)
-			volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
-		this_device->volume_offline = volume_offline & 0xff;
+		if (volume_offline == HPSA_LV_FAILED) {
+			rc = HPSA_LV_FAILED;
+			dev_err(&h->pdev->dev,
+				"%s: LV failed, device will be skipped.\n",
+				__func__);
+			goto bail_out;
+		}
 	} else {
 		this_device->raid_level = RAID_UNKNOWN;
 		this_device->offload_config = 0;
@@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
 			goto out;
 		}
 		if (rc) {
-			dev_warn(&h->pdev->dev,
-				"Inquiry failed, skipping device.\n");
+			h->drv_req_rescan = 1;
 			continue;
 		}
 
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index a584cdf07058..5961705eef76 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -156,6 +156,7 @@
 #define CFGTBL_BusType_Fibre2G  0x00000200l
 
 /* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED		0x02
 #define HPSA_VPD_SUPPORTED_PAGES        0x00
 #define HPSA_VPD_LV_DEVICE_ID           0x83
 #define HPSA_VPD_LV_DEVICE_GEOMETRY     0xC1
@@ -166,6 +167,7 @@
 /* Logical volume states */
 #define HPSA_VPD_LV_STATUS_UNSUPPORTED			0xff
 #define HPSA_LV_OK                                      0x0
+#define HPSA_LV_FAILED					0x01
 #define HPSA_LV_NOT_AVAILABLE				0x0b
 #define HPSA_LV_UNDERGOING_ERASE			0x0F
 #define HPSA_LV_UNDERGOING_RPI				0x12
-- 
cgit v1.2.3


From 87b9e6aa87d9411f1059aa245c0c79976bc557ac Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Fri, 10 Mar 2017 14:35:17 -0600
Subject: scsi: hpsa: limit outstanding rescans

Avoid rescan storms. No need to queue another if one is pending.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 16 +++++++++++++++-
 drivers/scsi/hpsa.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 90b76c4c6d36..0a8ac68f4ca1 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5555,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
 
 	spin_lock_irqsave(&h->scan_lock, flags);
 	h->scan_finished = 1;
-	wake_up_all(&h->scan_wait_queue);
+	wake_up(&h->scan_wait_queue);
 	spin_unlock_irqrestore(&h->scan_lock, flags);
 }
 
@@ -5573,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
 	if (unlikely(lockup_detected(h)))
 		return hpsa_scan_complete(h);
 
+	/*
+	 * If a scan is already waiting to run, no need to add another
+	 */
+	spin_lock_irqsave(&h->scan_lock, flags);
+	if (h->scan_waiting) {
+		spin_unlock_irqrestore(&h->scan_lock, flags);
+		return;
+	}
+
+	spin_unlock_irqrestore(&h->scan_lock, flags);
+
 	/* wait until any scan already in progress is finished. */
 	while (1) {
 		spin_lock_irqsave(&h->scan_lock, flags);
 		if (h->scan_finished)
 			break;
+		h->scan_waiting = 1;
 		spin_unlock_irqrestore(&h->scan_lock, flags);
 		wait_event(h->scan_wait_queue, h->scan_finished);
 		/* Note: We don't need to worry about a race between this
@@ -5587,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
 		 */
 	}
 	h->scan_finished = 0; /* mark scan as in progress */
+	h->scan_waiting = 0;
 	spin_unlock_irqrestore(&h->scan_lock, flags);
 
 	if (unlikely(lockup_detected(h)))
@@ -8789,6 +8802,7 @@ reinit_after_soft_reset:
 	init_waitqueue_head(&h->event_sync_wait_queue);
 	mutex_init(&h->reset_mutex);
 	h->scan_finished = 1; /* no scan currently in progress */
+	h->scan_waiting = 0;
 
 	pci_set_drvdata(pdev, h);
 	h->ndevices = 0;
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index bf6cdc106654..6f04f2ad4125 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -201,6 +201,7 @@ struct ctlr_info {
 	dma_addr_t		errinfo_pool_dhandle;
 	unsigned long  		*cmd_pool_bits;
 	int			scan_finished;
+	u8			scan_waiting : 1;
 	spinlock_t		scan_lock;
 	wait_queue_head_t	scan_wait_queue;
 
-- 
cgit v1.2.3


From 2ef2884980873081a4edae92f9d88dd580c85f6e Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Fri, 10 Mar 2017 14:35:23 -0600
Subject: scsi: hpsa: do not timeout reset operations

Resets can take longer than DEFAULT_TIMEOUT.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0a8ac68f4ca1..0d0be7754a65 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 	/* fill_cmd can't fail here, no data buffer to map. */
 	(void) fill_cmd(c, reset_type, h, NULL, 0, 0,
 			scsi3addr, TYPE_MSG);
-	rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+	rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
 	if (rc) {
 		dev_warn(&h->pdev->dev, "Failed to send reset command\n");
 		goto out;
-- 
cgit v1.2.3


From 949d7fa158b2b1af533bdb1af0dda8ab103ac58d Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 12 Mar 2017 12:22:02 +0200
Subject: scsi: ufs: don't check unsigned type for a negative value

Fix compilation warning:

drivers/scsi/ufs/ufshcd.c:7645:13: warning: comparison of unsigned
expression < 0 is always false [-Wtype-limits]
if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1359913bf840..e8c26e6e6237 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev,
 	if (kstrtoul(buf, 0, &value))
 		return -EINVAL;
 
-	if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))
+	if (value >= UFS_PM_LVL_MAX)
 		return -EINVAL;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
-- 
cgit v1.2.3


From 7d7080335f8d93a51e8238b6e85be8af4ba452b6 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 8 Mar 2017 14:36:01 -0800
Subject: scsi: lpfc: Finalize Kconfig options for nvme

Reviewing the result of what was just added for Kconfig, we made a poor
choice. It worked well for full kernel builds, but not so much for how
it would be deployed on a distro.

Here's the final result:
- lpfc will compile in NVME initiator and/or NVME target support based
  on whether the kernel has the corresponding subsystem support.
  Kconfig is not used to drive this specifically for lpfc.
- There is a module parameter, lpfc_enable_fc4_type, that indicates
  whether the ports will do FCP-only or FCP & NVME (NVME-only not yet
  possible due to dependency on fc transport). As FCP & NVME divvys up
  exchange resources, and given NVME will not be often initially, the
  default is changed to FCP only.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig           | 14 --------------
 drivers/scsi/lpfc/lpfc_attr.c  |  4 ++--
 drivers/scsi/lpfc/lpfc_init.c  |  7 +++++++
 drivers/scsi/lpfc/lpfc_nvme.c  |  8 ++++----
 drivers/scsi/lpfc/lpfc_nvmet.c |  8 ++++----
 5 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4bf55b5d78be..3c52867dfe28 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS
 	  This makes debugging information from the lpfc driver
 	  available via the debugfs filesystem.
 
-config LPFC_NVME_INITIATOR
-	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
-	depends on SCSI_LPFC && NVME_FC
-	---help---
-	  This enables NVME Initiator support in the Emulex lpfc driver.
-
-config LPFC_NVME_TARGET
-	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
-	depends on SCSI_LPFC && NVME_TARGET_FC
-	---help---
-	  This enables NVME Target support in the Emulex lpfc driver.
-	  Target enablement must still be enabled on a per adapter
-	  basis by module parameters.
-
 config SCSI_SIM710
 	tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
 	depends on (EISA || MCA) && SCSI
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index fbd3a563be53..84aa62f1a4de 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
- * Supported values are [1,3]. Default value is 3
+ * Supported values are [1,3]. Default value is 1
  */
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
 	    LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
 	    "Define fc4 type to register with fabric.");
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 2697d49da4d7..6cc561b04211 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		/* Check to see if it matches any module parameter */
 		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
 			if (wwn == lpfc_enable_nvmet[i]) {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6017 NVME Target %016llx\n",
 						wwn);
 				phba->nvmet_support = 1; /* a match */
+#else
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6021 Can't enable NVME Target."
+						" NVME_TARGET_FC infrastructure"
+						" is not in kernel\n");
+#endif
 			}
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 0a4c19081409..0024de1c6c1f 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
 	 */
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
 					 &vport->phba->pcidev->dev, &localport);
 #else
@@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 int
 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	int ret = 0;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
@@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	int ret;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index b7739a554fe0..7ca868f394da 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
 					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
 
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
 					     &phba->pcidev->dev,
 					     &phba->targetport);
@@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_tgtport *tgtp;
 
 	if (phba->nvmet_support == 0)
@@ -788,7 +788,7 @@ static void
 lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			   struct hbq_dmabuf *nvmebuf)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			    struct rqb_dmabuf *nvmebuf,
 			    uint64_t isr_timestamp)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
-- 
cgit v1.2.3


From 94840e3c802daa1a62985957f36ac48faf8ceedd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 22 Feb 2017 13:25:14 -0800
Subject: fscrypt: eliminate ->prepare_context() operation

The only use of the ->prepare_context() fscrypt operation was to allow
ext4 to evict inline data from the inode before ->set_context().
However, there is no reason why this cannot be done as simply the first
step in ->set_context(), and in fact it makes more sense to do it that
way because then the policy modes and flags get validated before any
real work is done.  Therefore, merge ext4_prepare_context() into
ext4_set_context(), and remove ->prepare_context().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/policy.c             |  7 -------
 fs/ext4/super.c                | 10 ++++------
 include/linux/fscrypt_common.h |  1 -
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 14b76da71269..4908906d54d5 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode,
 				const struct fscrypt_policy *policy)
 {
 	struct fscrypt_context ctx;
-	int res;
 
 	if (!inode->i_sb->s_cop->set_context)
 		return -EOPNOTSUPP;
 
-	if (inode->i_sb->s_cop->prepare_context) {
-		res = inode->i_sb->s_cop->prepare_context(inode);
-		if (res)
-			return res;
-	}
-
 	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
 	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
 					FS_KEY_DESCRIPTOR_SIZE);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2e03a0a88d92..a9448db1cf7e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
 				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
 }
 
-static int ext4_prepare_context(struct inode *inode)
-{
-	return ext4_convert_inline_data(inode);
-}
-
 static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 							void *fs_data)
 {
 	handle_t *handle = fs_data;
 	int res, res2, retries = 0;
 
+	res = ext4_convert_inline_data(inode);
+	if (res)
+		return res;
+
 	/*
 	 * If a journal handle was specified, then the encryption context is
 	 * being set on a new inode via inheritance and is part of a larger
@@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode)
 static const struct fscrypt_operations ext4_cryptops = {
 	.key_prefix		= "ext4:",
 	.get_context		= ext4_get_context,
-	.prepare_context	= ext4_prepare_context,
 	.set_context		= ext4_set_context,
 	.dummy_context		= ext4_dummy_context,
 	.is_encrypted		= ext4_encrypted_inode,
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index 547f81592ba1..10c1abfbac6c 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -87,7 +87,6 @@ struct fscrypt_operations {
 	unsigned int flags;
 	const char *key_prefix;
 	int (*get_context)(struct inode *, void *, size_t);
-	int (*prepare_context)(struct inode *);
 	int (*set_context)(struct inode *, const void *, size_t, void *);
 	int (*dummy_context)(struct inode *);
 	bool (*is_encrypted)(struct inode *);
-- 
cgit v1.2.3


From b9cf625d6ecde0d372e23ae022feead72b4228a6 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 15 Mar 2017 14:52:02 -0400
Subject: ext4: mark inode dirty after converting inline directory

If ext4_convert_inline_data() was called on a directory with inline
data, the filesystem was left in an inconsistent state (as considered by
e2fsck) because the file size was not increased to cover the new block.
This happened because the inode was not marked dirty after i_disksize
was updated.  Fix this by marking the inode dirty at the end of
ext4_finish_convert_inline_dir().

This bug was probably not noticed before because most users mark the
inode dirty afterwards for other reasons.  But if userspace executed
FS_IOC_SET_ENCRYPTION_POLICY with invalid parameters, as exercised by
'kvm-xfstests -c adv generic/396', then the inode was never marked dirty
after updating i_disksize.

Cc: stable@vger.kernel.org  # 3.10+
Fixes: 3c47d54170b6a678875566b1b8d6dcf57904e49b
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inline.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 30a9f210d1e3..375fb1c05d49 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1169,10 +1169,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
 	set_buffer_uptodate(dir_block);
 	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
 	if (err)
-		goto out;
+		return err;
 	set_buffer_verified(dir_block);
-out:
-	return err;
+	return ext4_mark_inode_dirty(handle, inode);
 }
 
 static int ext4_convert_inline_data_nolock(handle_t *handle,
-- 
cgit v1.2.3


From cd9cb405e0b948363811dc74dbb2890f56f2cb87 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 15 Mar 2017 15:08:48 -0400
Subject: jbd2: don't leak memory if setting up journal fails

In journal_init_common(), if we failed to allocate the j_wbuf array, or
if we failed to create the buffer_head for the journal superblock, we
leaked the memory allocated for the revocation tables.  Fix this.

Cc: stable@vger.kernel.org # 4.9
Fixes: f0c9fd5458bacf7b12a9a579a727dc740cbe047e
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/jbd2/journal.c | 22 +++++++++++-----------
 fs/jbd2/revoke.c  |  1 +
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a1a359bfcc9c..5adc2fb62b0f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1125,10 +1125,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
 
 	/* Set up a default-sized revoke table for the new mount. */
 	err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
-	if (err) {
-		kfree(journal);
-		return NULL;
-	}
+	if (err)
+		goto err_cleanup;
 
 	spin_lock_init(&journal->j_history_lock);
 
@@ -1145,23 +1143,25 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	journal->j_wbufsize = n;
 	journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
 					GFP_KERNEL);
-	if (!journal->j_wbuf) {
-		kfree(journal);
-		return NULL;
-	}
+	if (!journal->j_wbuf)
+		goto err_cleanup;
 
 	bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
 	if (!bh) {
 		pr_err("%s: Cannot get buffer for journal superblock\n",
 			__func__);
-		kfree(journal->j_wbuf);
-		kfree(journal);
-		return NULL;
+		goto err_cleanup;
 	}
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
 
 	return journal;
+
+err_cleanup:
+	kfree(journal->j_wbuf);
+	jbd2_journal_destroy_revoke(journal);
+	kfree(journal);
+	return NULL;
 }
 
 /* jbd2_journal_init_dev and jbd2_journal_init_inode:
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index cfc38b552118..f9aefcda5854 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -280,6 +280,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 
 fail1:
 	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+	journal->j_revoke_table[0] = NULL;
 fail0:
 	return -ENOMEM;
 }
-- 
cgit v1.2.3


From fe8daf5fa715f7214952f06a387e4b7de818c5be Mon Sep 17 00:00:00 2001
From: Taku Izumi <izumi.taku@jp.fujitsu.com>
Date: Wed, 15 Mar 2017 13:47:50 +0900
Subject: fjes: Fix wrong netdevice feature flags

This patch fixes netdev->features for Extended Socket network device.

Currently Extended Socket network device's netdev->feature claims
NETIF_F_HW_CSUM, however this is completely wrong. There's no feature
of checksum offloading.
That causes invalid TCP/UDP checksum and packet rejection when IP
forwarding from Extended Socket network device to other network device.

NETIF_F_HW_CSUM should be omitted.

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index b75d9cdcfb0c..c4b3c4b77a9c 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1316,7 +1316,7 @@ static void fjes_netdev_setup(struct net_device *netdev)
 	netdev->min_mtu = fjes_support_mtu[0];
 	netdev->max_mtu = fjes_support_mtu[3];
 	netdev->flags |= IFF_BROADCAST;
-	netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 }
 
 static void fjes_irq_watch_task(struct work_struct *work)
-- 
cgit v1.2.3


From e83bb3e6f3efa21f4a9d883a25d0ecd9dfb431e1 Mon Sep 17 00:00:00 2001
From: Michael Engl <michael.engl@wjw-solutions.com>
Date: Tue, 3 Oct 2017 13:57:00 +0100
Subject: iio: adc: ti_am335x_adc: fix fifo overrun recovery

The tiadc_irq_h(int irq, void *private) function is handling FIFO
overruns by clearing flags, disabling and enabling the ADC to
recover.

If the ADC is running in continuous mode a FIFO overrun happens
regularly. If the disabling of the ADC happens concurrently with
a new conversion. It might happen that the enabling of the ADC
is ignored by the hardware. This stops the ADC permanently. No
more interrupts are triggered.

According to the AM335x Reference Manual (SPRUH73H October 2011 -
Revised April 2013 - Chapter 12.4 and 12.5) it is necessary to
check the ADC FSM bits in REG_ADCFSM before enabling the ADC
again. Because the disabling of the ADC is done right after the
current conversion has been finished.

To trigger this bug it is necessary to run the ADC in continuous
mode. The ADC values of all channels need to be read in an endless
loop. The bug appears within the first 6 hours (~5.4 million
handled FIFO overruns). The user space application will hang on
reading new values from the character device.

Fixes: ca9a563805f7a ("iio: ti_am335x_adc: Add continuous sampling
support")
Signed-off-by: Michael Engl <michael.engl@wjw-solutions.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/ti_am335x_adc.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index ad9dec30bb30..4282ceca3d8f 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -169,7 +169,9 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
 {
 	struct iio_dev *indio_dev = private;
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
-	unsigned int status, config;
+	unsigned int status, config, adc_fsm;
+	unsigned short count = 0;
+
 	status = tiadc_readl(adc_dev, REG_IRQSTATUS);
 
 	/*
@@ -183,6 +185,15 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
 		tiadc_writel(adc_dev, REG_CTRL, config);
 		tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN
 				| IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES);
+
+		/* wait for idle state.
+		 * ADC needs to finish the current conversion
+		 * before disabling the module
+		 */
+		do {
+			adc_fsm = tiadc_readl(adc_dev, REG_ADCFSM);
+		} while (adc_fsm != 0x10 && count++ < 100);
+
 		tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB));
 		return IRQ_HANDLED;
 	} else if (status & IRQENB_FIFO1THRES) {
-- 
cgit v1.2.3


From 5f655322b1ba4bd46e26e307d04098f9c84df764 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 14 Mar 2017 11:47:29 -0400
Subject: parisc: support R_PARISC_SECREL32 relocation in modules

The parisc kernel doesn't work with CONFIG_MODVERSIONS since the commit
71810db27c1c853b335675bee335d893bc3d324b. It can't load modules with the
error: "module unix: Unknown relocation: 41".

The commit changes __kcrctab from 64-bit valus to 32-bit values. The
assembler generates R_PARISC_SECREL32 secrel relocation for them and the
module loader doesn't support this relocation.

This patch adds the R_PARISC_SECREL32 relocation to the module loader.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# v4.10+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index a0ecdb4abcc8..c66c943d9322 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			 */
 			*loc = fsel(val, addend); 
 			break;
+		case R_PARISC_SECREL32:
+			/* 32-bit section relative address. */
+			*loc = fsel(val, addend);
+			break;
 		case R_PARISC_DPREL21L:
 			/* left 21 bit of relative address */
 			val = lrsel(val - dp, addend);
@@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			 */
 			*loc = fsel(val, addend); 
 			break;
+		case R_PARISC_SECREL32:
+			/* 32-bit section relative address. */
+			*loc = fsel(val, addend);
+			break;
 		case R_PARISC_FPTR64:
 			/* 64-bit function address */
 			if(in_local(me, (void *)(val + addend))) {
-- 
cgit v1.2.3


From 316ec0624f951166daedbe446988ef92ae72b59f Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sat, 11 Mar 2017 18:03:34 -0500
Subject: parisc: Optimize flush_kernel_vmap_range and
 invalidate_kernel_vmap_range

The previously submitted patch did not resolve the random segmentation
faults observed on the phantom buildd system.  There are still
unresolved problems with the Debian 4.8 and 4.9 kernels on C8000.

The attached patch removes the flush of the offset map pages and does a
whole data cache flush for large ranges.  No other arch flushes the
offset map in these routines as far as I can tell.

I have not observed any random segmentation faults on rp3440 in two
weeks of testing with 4.10.0 and 4.10.1.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org      # v4.8+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h | 23 ++---------------------
 arch/parisc/kernel/cache.c           | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 19c9c3c5f267..c7e15cc5c668 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
-/* vmap range flushes and invalidates.  Architecturally, we don't need
- * the invalidate, because the CPU should refuse to speculate once an
- * area has been flushed, so invalidate is left empty */
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
-	unsigned long start = (unsigned long)vaddr;
-
-	flush_kernel_dcache_range_asm(start, start + size);
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
-	unsigned long start = (unsigned long)vaddr;
-	void *cursor = vaddr;
 
-	for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) {
-		struct page *page = vmalloc_to_page(cursor);
-
-		if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
-			flush_kernel_dcache_page(page);
-	}
-	flush_kernel_dcache_range_asm(start, start + size);
-}
+void flush_kernel_vmap_range(void *vaddr, int size);
+void invalidate_kernel_vmap_range(void *vaddr, int size);
 
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 0dc72d5de861..c32a09095216 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 	}
 }
+
+void flush_kernel_vmap_range(void *vaddr, int size)
+{
+	unsigned long start = (unsigned long)vaddr;
+
+	if ((unsigned long)size > parisc_cache_flush_threshold)
+		flush_data_cache();
+	else
+		flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+	unsigned long start = (unsigned long)vaddr;
+
+	if ((unsigned long)size > parisc_cache_flush_threshold)
+		flush_data_cache();
+	else
+		flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);
-- 
cgit v1.2.3


From 63d32d1e09cb2fc65b084b261976c06b40d19115 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 15 Mar 2017 21:10:17 +0100
Subject: parisc: Wire up statx system call

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/uapi/asm/unistd.h | 3 ++-
 arch/parisc/kernel/syscall_table.S    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 6b0741e7a7ed..667c99421003 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -362,8 +362,9 @@
 #define __NR_copy_file_range	(__NR_Linux + 346)
 #define __NR_preadv2		(__NR_Linux + 347)
 #define __NR_pwritev2		(__NR_Linux + 348)
+#define __NR_statx		(__NR_Linux + 349)
 
-#define __NR_Linux_syscalls	(__NR_pwritev2 + 1)
+#define __NR_Linux_syscalls	(__NR_statx + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 3cfef1de8061..44aeaa9c039f 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -444,6 +444,7 @@
 	ENTRY_SAME(copy_file_range)
 	ENTRY_COMP(preadv2)
 	ENTRY_COMP(pwritev2)
+	ENTRY_SAME(statx)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
-- 
cgit v1.2.3


From 0f424de1fd9bc4ab24bd1fe5430ab5618e803e31 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 14 Mar 2017 14:42:03 -0400
Subject: drm/radeon/si: add dpm quirk for Oland
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems
with higher sclks.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index d12b8978142f..72e1588580a1 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+	} else if (rdev->family == CHIP_OLAND) {
+		if ((rdev->pdev->device == 0x6604) &&
+		    (rdev->pdev->subsystem_vendor == 0x1028) &&
+		    (rdev->pdev->subsystem_device == 0x066F)) {
+			max_sclk = 75000;
+		}
 	}
 
 	if (rps->vce_active) {
-- 
cgit v1.2.3


From 18a8de1bc37e97dff1c96ee6cf49adbd02a0f775 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 14 Mar 2017 19:24:19 -0400
Subject: drm/amdgpu/si: add dpm quirk for Oland
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems
with higher sclks.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index f55e45b52fbc..33b504bafb88 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 		    (adev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+	} else if (adev->asic_type == CHIP_OLAND) {
+		if ((adev->pdev->device == 0x6604) &&
+		    (adev->pdev->subsystem_vendor == 0x1028) &&
+		    (adev->pdev->subsystem_device == 0x066F)) {
+			max_sclk = 75000;
+		}
 	}
 
 	if (rps->vce_active) {
-- 
cgit v1.2.3


From 801a6aa9a63c90724e8899982ad8c7f16be1e2cd Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Wed, 15 Mar 2017 05:34:25 -0400
Subject: drm/amd/amdgpu:  Fix debugfs reg read/write address width
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MMIO space is wider now so we mask the lower 22 bits
instead of 18.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4120b351a8e5..a3a105ec99e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		use_bank = 0;
 	}
 
-	*pos &= 0x3FFFF;
+	*pos &= (1UL << 22) - 1;
 
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 		use_bank = 0;
 	}
 
-	*pos &= 0x3FFFF;
+	*pos &= (1UL << 22) - 1;
 
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
-- 
cgit v1.2.3


From 186ecf14e58befba434f0774eea89e35f64d3c6a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 15 Mar 2017 21:48:42 +0100
Subject: parisc: Avoid compiler warnings with access_ok()

Commit 09b871ffd4d8 (parisc: Define access_ok() as macro) missed to mark uaddr
as used, which then gives compiler warnings about unused variables.

Fix it by comparing uaddr to uaddr which then gets optimized away by the
compiler.

Signed-off-by: Helge Deller <deller@gmx.de>
Fixes: 09b871ffd4d8 ("parisc: Define access_ok() as macro")
---
 arch/parisc/include/asm/uaccess.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index fb4382c28259..edfbf9d6a6dd 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,7 +32,8 @@
  * that put_user is the same as __put_user, etc.
  */
 
-#define access_ok(type, uaddr, size) (1)
+#define access_ok(type, uaddr, size)	\
+	( (uaddr) == (uaddr) )
 
 #define put_user __put_user
 #define get_user __get_user
-- 
cgit v1.2.3


From 3d20f1f7bd575d147ffa75621fa560eea0aec690 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 15 Mar 2017 18:10:47 +0200
Subject: net/openvswitch: Set the ipv6 source tunnel key address attribute
 correctly

When dealing with ipv6 source tunnel key address attribute
(OVS_TUNNEL_KEY_ATTR_IPV6_SRC) we are wrongly setting the tunnel
dst ip, fix that.

Fixes: 6b26ba3a7d95 ('openvswitch: netlink attributes for IPv6 tunneling')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 6f5fa50f716d..a08ff834676b 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -604,7 +604,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
 			ipv4 = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
-			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
 					nla_get_in6_addr(a), is_mask);
 			ipv6 = true;
 			break;
-- 
cgit v1.2.3


From 88d339e2d3be955848a034970970931a7ed33956 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 15 Mar 2017 18:39:46 +0100
Subject: MAINTAINERS: remove MACVLAN and VLAN entries

macvlan.c file seems to be both in VLAN and MACVLAN DRIVER, so remove
the MACVLAN DRIVER since this is redundant.

I propose with this patch to remove the VLAN (802.1Q) entry so this just
falls into the NETWORKING [GENERAL].

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c776906f67a9..33875e07482b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7774,13 +7774,6 @@ F:	include/net/mac80211.h
 F:	net/mac80211/
 F:	drivers/net/wireless/mac80211_hwsim.[ch]
 
-MACVLAN DRIVER
-M:	Patrick McHardy <kaber@trash.net>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/macvlan.c
-F:	include/linux/if_macvlan.h
-
 MAILBOX API
 M:	Jassi Brar <jassisinghbrar@gmail.com>
 L:	linux-kernel@vger.kernel.org
@@ -13383,14 +13376,6 @@ W:	https://linuxtv.org
 S:	Maintained
 F:	drivers/media/platform/vivid/*
 
-VLAN (802.1Q)
-M:	Patrick McHardy <kaber@trash.net>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/macvlan.c
-F:	include/linux/if_*vlan.h
-F:	net/8021q/
-
 VLYNQ BUS
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	openwrt-devel@lists.openwrt.org (subscribers-only)
-- 
cgit v1.2.3


From 5371bbf4b295eea334ed453efa286afa2c3ccff3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 15 Mar 2017 12:57:21 -0700
Subject: net: bcmgenet: Do not suspend PHY if Wake-on-LAN is enabled

Suspending the PHY would be putting it in a low power state where it
may no longer allow us to do Wake-on-LAN.

Fixes: cc013fb48898 ("net: bcmgenet: correctly suspend and resume PHY device")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 69015fa50f20..365895ed3c3e 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3481,7 +3481,8 @@ static int bcmgenet_suspend(struct device *d)
 
 	bcmgenet_netif_stop(dev);
 
-	phy_suspend(priv->phydev);
+	if (!device_may_wakeup(d))
+		phy_suspend(priv->phydev);
 
 	netif_device_detach(dev);
 
@@ -3578,7 +3579,8 @@ static int bcmgenet_resume(struct device *d)
 
 	netif_device_attach(dev);
 
-	phy_resume(priv->phydev);
+	if (!device_may_wakeup(d))
+		phy_resume(priv->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);
-- 
cgit v1.2.3


From 622c36f143fc9566ba49d7cec994c2da1182d9e2 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Wed, 15 Mar 2017 15:11:23 -0500
Subject: amd-xgbe: Fix jumbo MTU processing on newer hardware

Newer hardware does not provide a cumulative payload length when multiple
descriptors are needed to handle the data. Once the MTU increases beyond
the size that can be handled by a single descriptor, the SKB does not get
built properly by the driver.

The driver will now calculate the size of the data buffers used by the
hardware.  The first buffer of the first descriptor is for packet headers
or packet headers and data when the headers can't be split. Subsequent
descriptors in a multi-descriptor chain will not use the first buffer. The
second buffer is used by all the descriptors in the chain for payload data.
Based on whether the driver is processing the first, intermediate, or last
descriptor it can calculate the buffer usage and build the SKB properly.

Tested and verified on both old and new hardware.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h |   6 +-
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |  20 +++---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c    | 102 +++++++++++++++++-----------
 3 files changed, 78 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 8a280e7d66bd..86f1626816ff 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -1148,8 +1148,8 @@
 #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX	1
 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX	2
-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_LAST_INDEX		2
+#define RX_PACKET_ATTRIBUTES_LAST_WIDTH		1
 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX	3
 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX	4
@@ -1158,6 +1158,8 @@
 #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX	6
 #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_FIRST_INDEX	7
+#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH	1
 
 #define RX_NORMAL_DESC0_OVT_INDEX		0
 #define RX_NORMAL_DESC0_OVT_WIDTH		16
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 937f37a5dcb2..24a687ce4388 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1896,10 +1896,15 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 
 	/* Get the header length */
 	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) {
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       FIRST, 1);
 		rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
 						      RX_NORMAL_DESC2, HL);
 		if (rdata->rx.hdr_len)
 			pdata->ext_stats.rx_split_header_packets++;
+	} else {
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       FIRST, 0);
 	}
 
 	/* Get the RSS hash */
@@ -1922,19 +1927,16 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 		}
 	}
 
-	/* Get the packet length */
-	rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
-
-	if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
-		/* Not all the data has been transferred for this packet */
-		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
-			       INCOMPLETE, 1);
+	/* Not all the data has been transferred for this packet */
+	if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD))
 		return 0;
-	}
 
 	/* This is the last of the data for this packet */
 	XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
-		       INCOMPLETE, 0);
+		       LAST, 1);
+
+	/* Get the packet length */
+	rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
 	/* Set checksum done indicator as appropriate */
 	if (netdev->features & NETIF_F_RXCSUM)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index ffea9859f5a7..a713abd9d03e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1971,13 +1971,12 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
 {
 	struct sk_buff *skb;
 	u8 *packet;
-	unsigned int copy_len;
 
 	skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len);
 	if (!skb)
 		return NULL;
 
-	/* Start with the header buffer which may contain just the header
+	/* Pull in the header buffer which may contain just the header
 	 * or the header plus data
 	 */
 	dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base,
@@ -1986,30 +1985,49 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
 
 	packet = page_address(rdata->rx.hdr.pa.pages) +
 		 rdata->rx.hdr.pa.pages_offset;
-	copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len;
-	copy_len = min(rdata->rx.hdr.dma_len, copy_len);
-	skb_copy_to_linear_data(skb, packet, copy_len);
-	skb_put(skb, copy_len);
-
-	len -= copy_len;
-	if (len) {
-		/* Add the remaining data as a frag */
-		dma_sync_single_range_for_cpu(pdata->dev,
-					      rdata->rx.buf.dma_base,
-					      rdata->rx.buf.dma_off,
-					      rdata->rx.buf.dma_len,
-					      DMA_FROM_DEVICE);
-
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				rdata->rx.buf.pa.pages,
-				rdata->rx.buf.pa.pages_offset,
-				len, rdata->rx.buf.dma_len);
-		rdata->rx.buf.pa.pages = NULL;
-	}
+	skb_copy_to_linear_data(skb, packet, len);
+	skb_put(skb, len);
 
 	return skb;
 }
 
+static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata,
+				     struct xgbe_packet_data *packet)
+{
+	/* Always zero if not the first descriptor */
+	if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST))
+		return 0;
+
+	/* First descriptor with split header, return header length */
+	if (rdata->rx.hdr_len)
+		return rdata->rx.hdr_len;
+
+	/* First descriptor but not the last descriptor and no split header,
+	 * so the full buffer was used
+	 */
+	if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
+		return rdata->rx.hdr.dma_len;
+
+	/* First descriptor and last descriptor and no split header, so
+	 * calculate how much of the buffer was used
+	 */
+	return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len);
+}
+
+static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata,
+				     struct xgbe_packet_data *packet,
+				     unsigned int len)
+{
+	/* Always the full buffer if not the last descriptor */
+	if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
+		return rdata->rx.buf.dma_len;
+
+	/* Last descriptor so calculate how much of the buffer was used
+	 * for the last bit of data
+	 */
+	return rdata->rx.len - len;
+}
+
 static int xgbe_tx_poll(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
@@ -2092,8 +2110,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	struct napi_struct *napi;
 	struct sk_buff *skb;
 	struct skb_shared_hwtstamps *hwtstamps;
-	unsigned int incomplete, error, context_next, context;
-	unsigned int len, rdesc_len, max_len;
+	unsigned int last, error, context_next, context;
+	unsigned int len, buf1_len, buf2_len, max_len;
 	unsigned int received = 0;
 	int packet_count = 0;
 
@@ -2103,7 +2121,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	if (!ring)
 		return 0;
 
-	incomplete = 0;
+	last = 0;
 	context_next = 0;
 
 	napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
@@ -2137,9 +2155,8 @@ read_again:
 		received++;
 		ring->cur++;
 
-		incomplete = XGMAC_GET_BITS(packet->attributes,
-					    RX_PACKET_ATTRIBUTES,
-					    INCOMPLETE);
+		last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				      LAST);
 		context_next = XGMAC_GET_BITS(packet->attributes,
 					      RX_PACKET_ATTRIBUTES,
 					      CONTEXT_NEXT);
@@ -2148,7 +2165,7 @@ read_again:
 					 CONTEXT);
 
 		/* Earlier error, just drain the remaining data */
-		if ((incomplete || context_next) && error)
+		if ((!last || context_next) && error)
 			goto read_again;
 
 		if (error || packet->errors) {
@@ -2160,16 +2177,22 @@ read_again:
 		}
 
 		if (!context) {
-			/* Length is cumulative, get this descriptor's length */
-			rdesc_len = rdata->rx.len - len;
-			len += rdesc_len;
+			/* Get the data length in the descriptor buffers */
+			buf1_len = xgbe_rx_buf1_len(rdata, packet);
+			len += buf1_len;
+			buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+			len += buf2_len;
 
-			if (rdesc_len && !skb) {
+			if (!skb) {
 				skb = xgbe_create_skb(pdata, napi, rdata,
-						      rdesc_len);
-				if (!skb)
+						      buf1_len);
+				if (!skb) {
 					error = 1;
-			} else if (rdesc_len) {
+					goto skip_data;
+				}
+			}
+
+			if (buf2_len) {
 				dma_sync_single_range_for_cpu(pdata->dev,
 							rdata->rx.buf.dma_base,
 							rdata->rx.buf.dma_off,
@@ -2179,13 +2202,14 @@ read_again:
 				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 						rdata->rx.buf.pa.pages,
 						rdata->rx.buf.pa.pages_offset,
-						rdesc_len,
+						buf2_len,
 						rdata->rx.buf.dma_len);
 				rdata->rx.buf.pa.pages = NULL;
 			}
 		}
 
-		if (incomplete || context_next)
+skip_data:
+		if (!last || context_next)
 			goto read_again;
 
 		if (!skb)
@@ -2243,7 +2267,7 @@ next_packet:
 	}
 
 	/* Check if we need to save state before leaving */
-	if (received && (incomplete || context_next)) {
+	if (received && (!last || context_next)) {
 		rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
 		rdata->state_saved = 1;
 		rdata->state.skb = skb;
-- 
cgit v1.2.3


From 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 Mar 2017 13:21:28 -0700
Subject: net: properly release sk_frag.page

I mistakenly added the code to release sk->sk_frag in
sk_common_release() instead of sk_destruct()

TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call
sk_common_release() at close time, thus leaking one (order-3) page.

iSCSI is using such sockets.

Fixes: 5640f7685831 ("net: use a per task frag allocator")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index a96d5f7a5734..acb0d4137499 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1442,6 +1442,11 @@ static void __sk_destruct(struct rcu_head *head)
 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
 			 __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_frag.page) {
+		put_page(sk->sk_frag.page);
+		sk->sk_frag.page = NULL;
+	}
+
 	if (sk->sk_peer_cred)
 		put_cred(sk->sk_peer_cred);
 	put_pid(sk->sk_peer_pid);
@@ -2787,11 +2792,6 @@ void sk_common_release(struct sock *sk)
 
 	sk_refcnt_debug_release(sk);
 
-	if (sk->sk_frag.page) {
-		put_page(sk->sk_frag.page);
-		sk->sk_frag.page = NULL;
-	}
-
 	sock_put(sk);
 }
 EXPORT_SYMBOL(sk_common_release);
-- 
cgit v1.2.3


From 9b4f603e7a9f4282aec451063ffbbb8bb410dcd9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 15 Mar 2017 00:12:16 +0100
Subject: cpufreq: Fix and clean up show_cpuinfo_cur_freq()

There is a missing newline in show_cpuinfo_cur_freq(), so add it,
but while at it clean that function up somewhat too.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: All applicable <stable@vger.kernel.org>
---
 drivers/cpufreq/cpufreq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 38b9fdf854a4..b8ff617d449d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
 					char *buf)
 {
 	unsigned int cur_freq = __cpufreq_get(policy);
-	if (!cur_freq)
-		return sprintf(buf, "<unknown>");
-	return sprintf(buf, "%u\n", cur_freq);
+
+	if (cur_freq)
+		return sprintf(buf, "%u\n", cur_freq);
+
+	return sprintf(buf, "<unknown>\n");
 }
 
 /**
-- 
cgit v1.2.3


From a733ded50b6ea846200073e7381a302df71e13b3 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 5 Mar 2017 21:40:41 +0200
Subject: mei: fix deadlock on mei reset

This patch fixes 'mei: synchronize irq before initiating a reset'
The patch had introduced a deadlock between irq thread and mei_reset()
as they are both holding the same device lock.

---> device_lock:
	mei_reset()
                        <---- interrupt thread
	                        device_lock
---> synchornize_irq()
       wait on interrupt thread == (dead lock)

The fix is to call synchronize_irq
prior to call locked mei_reset function.

Cc: <stable@vger.kernel.org> #4.10+
Fixes: f302bb0de6ac (mei: synchronize irq before initiating a reset)
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/init.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index cfb1cdf176fa..13c55b8f9261 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -124,8 +124,6 @@ int mei_reset(struct mei_device *dev)
 
 	mei_clear_interrupts(dev);
 
-	mei_synchronize_irq(dev);
-
 	/* we're already in reset, cancel the init timer
 	 * if the reset was called due the hbm protocol error
 	 * we need to call it before hw start
@@ -304,6 +302,9 @@ static void mei_reset_work(struct work_struct *work)
 		container_of(work, struct mei_device,  reset_work);
 	int ret;
 
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
 	mutex_lock(&dev->device_lock);
 
 	ret = mei_reset(dev);
@@ -328,6 +329,9 @@ void mei_stop(struct mei_device *dev)
 
 	mei_cancel_work(dev);
 
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
 	mutex_lock(&dev->device_lock);
 
 	dev->dev_state = MEI_DEV_POWER_DOWN;
-- 
cgit v1.2.3


From c6240cacdb2c3cb56a21fb3ea0c105154ab87a2a Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Sun, 5 Mar 2017 21:40:42 +0200
Subject: mei: don't wait for os version message reply

The driver still struggles with firmwares that do not replay to the OS
version request. It is safe not waiting for the replay. First, the driver
doesn't do anything with the replay second the connection is closed
immediately, hence the packet will be just safely discarded in case it
is received and last the driver won't get stuck if the firmware won't
reply.

Cc: <stable@vger.kernel.org> #4.10+
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 3600c9993a98..29f2daed37e0 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -112,11 +112,9 @@ struct mkhi_msg {
 
 static int mei_osver(struct mei_cl_device *cldev)
 {
-	int ret;
 	const size_t size = sizeof(struct mkhi_msg_hdr) +
 			    sizeof(struct mkhi_fwcaps) +
 			    sizeof(struct mei_os_ver);
-	size_t length = 8;
 	char buf[size];
 	struct mkhi_msg *req;
 	struct mkhi_fwcaps *fwcaps;
@@ -137,15 +135,7 @@ static int mei_osver(struct mei_cl_device *cldev)
 	os_ver = (struct mei_os_ver *)fwcaps->data;
 	os_ver->os_type = OSTYPE_LINUX;
 
-	ret = __mei_cl_send(cldev->cl, buf, size, mode);
-	if (ret < 0)
-		return ret;
-
-	ret = __mei_cl_recv(cldev->cl, buf, length, 0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return __mei_cl_send(cldev->cl, buf, size, mode);
 }
 
 static void mei_mkhi_fix(struct mei_cl_device *cldev)
@@ -160,7 +150,7 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
 		return;
 
 	ret = mei_osver(cldev);
-	if (ret)
+	if (ret < 0)
 		dev_err(&cldev->dev, "OS version command failed %d\n", ret);
 
 	mei_cldev_disable(cldev);
-- 
cgit v1.2.3


From 8200f2085abe7f29a016381f3122000cc7b2a760 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 4 Mar 2017 18:13:57 -0700
Subject: vmbus: use rcu for per-cpu channel list

The per-cpu channel list is now referred to in the interrupt
routine. This is mostly safe since the host will not normally generate
an interrupt when channel is being deleted but if it did then there
would be a use after free problem.

To solve, this use RCU protection on ther per-cpu list.

Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet")

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 7 ++++---
 drivers/hv/vmbus_drv.c    | 6 +++++-
 include/linux/hyperv.h    | 7 +++++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f33465d78a02..d2cfa3eb71a2 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -350,7 +350,8 @@ static struct vmbus_channel *alloc_channel(void)
 static void free_channel(struct vmbus_channel *channel)
 {
 	tasklet_kill(&channel->callback_event);
-	kfree(channel);
+
+	kfree_rcu(channel, rcu);
 }
 
 static void percpu_channel_enq(void *arg)
@@ -359,14 +360,14 @@ static void percpu_channel_enq(void *arg)
 	struct hv_per_cpu_context *hv_cpu
 		= this_cpu_ptr(hv_context.cpu_context);
 
-	list_add_tail(&channel->percpu_list, &hv_cpu->chan_list);
+	list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
 }
 
 static void percpu_channel_deq(void *arg)
 {
 	struct vmbus_channel *channel = arg;
 
-	list_del(&channel->percpu_list);
+	list_del_rcu(&channel->percpu_list);
 }
 
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index da6b59ba5940..8370b9dc6037 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -939,8 +939,10 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 		if (relid == 0)
 			continue;
 
+		rcu_read_lock();
+
 		/* Find channel based on relid */
-		list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) {
+		list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
 			if (channel->offermsg.child_relid != relid)
 				continue;
 
@@ -956,6 +958,8 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 				tasklet_schedule(&channel->callback_event);
 			}
 		}
+
+		rcu_read_unlock();
 	}
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 62bbf3c1aa4a..c4c7ae91f9d1 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -845,6 +845,13 @@ struct vmbus_channel {
 	 * link up channels based on their CPU affinity.
 	 */
 	struct list_head percpu_list;
+
+	/*
+	 * Defer freeing channel until after all cpu's have
+	 * gone through grace period.
+	 */
+	struct rcu_head rcu;
+
 	/*
 	 * For performance critical channels (storage, networking
 	 * etc,), Hyper-V has a mechanism to enhance the throughput
-- 
cgit v1.2.3


From dad72a1d28442b03aac86836a42de2d00a1014ab Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 4 Mar 2017 18:13:58 -0700
Subject: vmbus: remove hv_event_tasklet_disable/enable

With the recent introduction of per-channel tasklet, we need to update
the way we handle the 3 concurrency issues:

1. hv_process_channel_removal -> percpu_channel_deq vs.
   vmbus_chan_sched -> list_for_each_entry(..., percpu_list);

2. vmbus_process_offer -> percpu_channel_enq/deq vs. vmbus_chan_sched.

3. vmbus_close_internal vs. the per-channel tasklet vmbus_on_event;

The first 2 issues can be handled by Stephen's recent patch
"vmbus: use rcu for per-cpu channel list", and the third issue
can be handled by calling tasklet_disable in vmbus_close_internal here.

We don't need the original hv_event_tasklet_disable/enable since we
now use per-channel tasklet instead of the previous per-CPU tasklet,
and actually we must remove them due to the side effect now:
vmbus_process_offer -> hv_event_tasklet_enable -> tasklet_schedule will
start the per-channel callback prematurely, cauing NULL dereferencing
(the channel may haven't been properly configured to run the callback yet).

Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 12 ++++--------
 drivers/hv/channel_mgmt.c | 19 -------------------
 include/linux/hyperv.h    |  3 ---
 3 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index bd0d1988feb2..57b2958205c7 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -530,15 +530,13 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	int ret;
 
 	/*
-	 * vmbus_on_event(), running in the tasklet, can race
+	 * vmbus_on_event(), running in the per-channel tasklet, can race
 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
 	 * the former is accessing channel->inbound.ring_buffer, the latter
-	 * could be freeing the ring_buffer pages.
-	 *
-	 * To resolve the race, we can serialize them by disabling the
-	 * tasklet when the latter is running here.
+	 * could be freeing the ring_buffer pages, so here we must stop it
+	 * first.
 	 */
-	hv_event_tasklet_disable(channel);
+	tasklet_disable(&channel->callback_event);
 
 	/*
 	 * In case a device driver's probe() fails (e.g.,
@@ -605,8 +603,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
-	hv_event_tasklet_enable(channel);
-
 	return ret;
 }
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d2cfa3eb71a2..bf846d078d85 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -382,19 +382,6 @@ static void vmbus_release_relid(u32 relid)
 		       true);
 }
 
-void hv_event_tasklet_disable(struct vmbus_channel *channel)
-{
-	tasklet_disable(&channel->callback_event);
-}
-
-void hv_event_tasklet_enable(struct vmbus_channel *channel)
-{
-	tasklet_enable(&channel->callback_event);
-
-	/* In case there is any pending event */
-	tasklet_schedule(&channel->callback_event);
-}
-
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 {
 	unsigned long flags;
@@ -403,7 +390,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 	BUG_ON(!channel->rescind);
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 
-	hv_event_tasklet_disable(channel);
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -412,7 +398,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 		percpu_channel_deq(channel);
 		put_cpu();
 	}
-	hv_event_tasklet_enable(channel);
 
 	if (channel->primary_channel == NULL) {
 		list_del(&channel->listentry);
@@ -506,7 +491,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
 	init_vp_index(newchannel, dev_type);
 
-	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -516,7 +500,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 		percpu_channel_enq(newchannel);
 		put_cpu();
 	}
-	hv_event_tasklet_enable(newchannel);
 
 	/*
 	 * This state is used to indicate a successful open
@@ -566,7 +549,6 @@ err_deq_chan:
 	list_del(&newchannel->listentry);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
-	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -575,7 +557,6 @@ err_deq_chan:
 		percpu_channel_deq(newchannel);
 		put_cpu();
 	}
-	hv_event_tasklet_enable(newchannel);
 
 	vmbus_release_relid(newchannel->offermsg.child_relid);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c4c7ae91f9d1..970771a5f739 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1437,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 				const int *srv_version, int srv_vercnt,
 				int *nego_fw_version, int *nego_srv_version);
 
-void hv_event_tasklet_disable(struct vmbus_channel *channel);
-void hv_event_tasklet_enable(struct vmbus_channel *channel);
-
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
 void vmbus_setevent(struct vmbus_channel *channel);
-- 
cgit v1.2.3


From e9c18ae6eb2b312f16c63e34b43ea23926daa398 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 4 Mar 2017 18:13:59 -0700
Subject: Drivers: hv: util: move waiting for release to hv_utils_transport
 itself

Waiting for release_event in all three drivers introduced issues on release
as on_reset() hook is not always called. E.g. if the device was never
opened we will never get the completion.

Move the waiting code to hvutil_transport_destroy() and make sure it is
only called when the device is open. hvt->lock serialization should
guarantee the absence of races.

Fixes: 5a66fecbf6aa ("Drivers: hv: util: kvp: Fix a rescind processing issue")
Fixes: 20951c7535b5 ("Drivers: hv: util: Fcopy: Fix a rescind processing issue")
Fixes: d77044d142e9 ("Drivers: hv: util: Backup: Fix a rescind processing issue")

Reported-by: Dexuan Cui <decui@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_fcopy.c           |  4 ----
 drivers/hv/hv_kvp.c             |  4 ----
 drivers/hv/hv_snapshot.c        |  4 ----
 drivers/hv/hv_utils_transport.c | 12 ++++++++----
 drivers/hv/hv_utils_transport.h |  1 +
 5 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 9aee6014339d..a5596a642ed0 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -71,7 +71,6 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data);
 static const char fcopy_devname[] = "vmbus/hv_fcopy";
 static u8 *recv_buffer;
 static struct hvutil_transport *hvt;
-static struct completion release_event;
 /*
  * This state maintains the version number registered by the daemon.
  */
@@ -331,7 +330,6 @@ static void fcopy_on_reset(void)
 
 	if (cancel_delayed_work_sync(&fcopy_timeout_work))
 		fcopy_respond_to_host(HV_E_FAIL);
-	complete(&release_event);
 }
 
 int hv_fcopy_init(struct hv_util_service *srv)
@@ -339,7 +337,6 @@ int hv_fcopy_init(struct hv_util_service *srv)
 	recv_buffer = srv->recv_buffer;
 	fcopy_transaction.recv_channel = srv->channel;
 
-	init_completion(&release_event);
 	/*
 	 * When this driver loads, the user level daemon that
 	 * processes the host requests may not yet be running.
@@ -361,5 +358,4 @@ void hv_fcopy_deinit(void)
 	fcopy_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&fcopy_timeout_work);
 	hvutil_transport_destroy(hvt);
-	wait_for_completion(&release_event);
 }
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index de263712e247..a1adfe2cfb34 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -101,7 +101,6 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key);
 static const char kvp_devname[] = "vmbus/hv_kvp";
 static u8 *recv_buffer;
 static struct hvutil_transport *hvt;
-static struct completion release_event;
 /*
  * Register the kernel component with the user-level daemon.
  * As part of this registration, pass the LIC version number.
@@ -714,7 +713,6 @@ static void kvp_on_reset(void)
 	if (cancel_delayed_work_sync(&kvp_timeout_work))
 		kvp_respond_to_host(NULL, HV_E_FAIL);
 	kvp_transaction.state = HVUTIL_DEVICE_INIT;
-	complete(&release_event);
 }
 
 int
@@ -723,7 +721,6 @@ hv_kvp_init(struct hv_util_service *srv)
 	recv_buffer = srv->recv_buffer;
 	kvp_transaction.recv_channel = srv->channel;
 
-	init_completion(&release_event);
 	/*
 	 * When this driver loads, the user level daemon that
 	 * processes the host requests may not yet be running.
@@ -747,5 +744,4 @@ void hv_kvp_deinit(void)
 	cancel_delayed_work_sync(&kvp_timeout_work);
 	cancel_work_sync(&kvp_sendkey_work);
 	hvutil_transport_destroy(hvt);
-	wait_for_completion(&release_event);
 }
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index bcc03f0748d6..e659d1b94a57 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -79,7 +79,6 @@ static int dm_reg_value;
 static const char vss_devname[] = "vmbus/hv_vss";
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
-static struct completion release_event;
 
 static void vss_timeout_func(struct work_struct *dummy);
 static void vss_handle_request(struct work_struct *dummy);
@@ -361,13 +360,11 @@ static void vss_on_reset(void)
 	if (cancel_delayed_work_sync(&vss_timeout_work))
 		vss_respond_to_host(HV_E_FAIL);
 	vss_transaction.state = HVUTIL_DEVICE_INIT;
-	complete(&release_event);
 }
 
 int
 hv_vss_init(struct hv_util_service *srv)
 {
-	init_completion(&release_event);
 	if (vmbus_proto_version < VERSION_WIN8_1) {
 		pr_warn("Integration service 'Backup (volume snapshot)'"
 			" not supported on this host version.\n");
@@ -400,5 +397,4 @@ void hv_vss_deinit(void)
 	cancel_delayed_work_sync(&vss_timeout_work);
 	cancel_work_sync(&vss_handle_request_work);
 	hvutil_transport_destroy(hvt);
-	wait_for_completion(&release_event);
 }
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index c235a9515267..4402a71e23f7 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -182,10 +182,11 @@ static int hvt_op_release(struct inode *inode, struct file *file)
 	 * connects back.
 	 */
 	hvt_reset(hvt);
-	mutex_unlock(&hvt->lock);
 
 	if (mode_old == HVUTIL_TRANSPORT_DESTROY)
-		hvt_transport_free(hvt);
+		complete(&hvt->release);
+
+	mutex_unlock(&hvt->lock);
 
 	return 0;
 }
@@ -304,6 +305,7 @@ struct hvutil_transport *hvutil_transport_init(const char *name,
 
 	init_waitqueue_head(&hvt->outmsg_q);
 	mutex_init(&hvt->lock);
+	init_completion(&hvt->release);
 
 	spin_lock(&hvt_list_lock);
 	list_add(&hvt->list, &hvt_list);
@@ -351,6 +353,8 @@ void hvutil_transport_destroy(struct hvutil_transport *hvt)
 	if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0)
 		cn_del_callback(&hvt->cn_id);
 
-	if (mode_old != HVUTIL_TRANSPORT_CHARDEV)
-		hvt_transport_free(hvt);
+	if (mode_old == HVUTIL_TRANSPORT_CHARDEV)
+		wait_for_completion(&hvt->release);
+
+	hvt_transport_free(hvt);
 }
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index d98f5225c3e6..79afb626e166 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -41,6 +41,7 @@ struct hvutil_transport {
 	int outmsg_len;                     /* its length */
 	wait_queue_head_t outmsg_q;         /* poll/read wait queue */
 	struct mutex lock;                  /* protects struct members */
+	struct completion release;          /* synchronize with fd release */
 };
 
 struct hvutil_transport *hvutil_transport_init(const char *name,
-- 
cgit v1.2.3


From 5a16dfc855127906fcd2935fb039bc8989313915 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 4 Mar 2017 18:14:00 -0700
Subject: Drivers: hv: util: don't forget to init host_ts.lock

Without the patch, I always get a "BUG: spinlock bad magic" warning.

Fixes: 3716a49a81ba ("hv_utils: implement Hyper-V PTP source")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_util.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 3042eaa13062..186b10083c55 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -590,6 +590,8 @@ static int hv_timesync_init(struct hv_util_service *srv)
 	if (!hyperv_cs)
 		return -ENODEV;
 
+	spin_lock_init(&host_ts.lock);
+
 	INIT_WORK(&wrk.work, hv_set_host_time);
 
 	/*
-- 
cgit v1.2.3


From 9a5476020a5f06a0fc6f17097efc80275d2f03cd Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Mon, 13 Mar 2017 15:57:09 -0700
Subject: Drivers: hv: vmbus: Don't leak channel ids

If we cannot allocate memory for the channel, free the relid
associated with the channel.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index bf846d078d85..fbcb06352308 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -796,6 +796,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	/* Allocate the channel object and save this offer. */
 	newchannel = alloc_channel();
 	if (!newchannel) {
+		vmbus_release_relid(offer->child_relid);
 		pr_err("Unable to allocate channel object\n");
 		return;
 	}
-- 
cgit v1.2.3


From 5e030d5ce9d99a899b648413139ff65bab12b038 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 12 Mar 2017 20:00:30 -0700
Subject: Drivers: hv: vmbus: Don't leak memory when a channel is rescinded

When we close a channel that has been rescinded, we will leak memory since
vmbus_teardown_gpadl() returns an error. Fix this so that we can properly
cleanup the memory allocated to the ring buffers.

Fixes: ccb61f8a99e6 ("Drivers: hv: vmbus: Fix a rescind handling bug")

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 57b2958205c7..321b8833fa6f 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -502,12 +502,15 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 
 	wait_for_completion(&info->waitevent);
 
-	if (channel->rescind) {
-		ret = -ENODEV;
-		goto post_msg_err;
-	}
-
 post_msg_err:
+	/*
+	 * If the channel has been rescinded;
+	 * we will be awakened by the rescind
+	 * handler; set the error code to zero so we don't leak memory.
+	 */
+	if (channel->rescind)
+		ret = 0;
+
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-- 
cgit v1.2.3


From 9a3fcf912ef7f5c6e18f9af6875dd13f7311f7aa Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 14 Mar 2017 09:50:35 +0200
Subject: iwlwifi: mvm: cleanup pending frames in DQA mode

When a station is asleep, the fw will set it as "asleep".
All queues that are used only by one station will be stopped by
the fw.

In pre-DQA mode this was relevant for aggregation queues. However,
in DQA mode a queue is owned by one station only, so all queues
will be stopped.
As a result, we don't expect to get filtered frames back to
mac80211 and don't have to maintain the entire pending_frames
state logic, the same way as we do in aggregations.

The correct behavior is to align DQA behavior with the aggregation
queue behaviour pre-DQA:
- Don't count pending frames.
- Let mac80211 know we have frames in these queues so that it can
properly handle trigger frames.

When a trigger frame is received, mac80211 tells the driver to send
frames from the queues using release_buffered_frames.
The driver will tell the fw to let frames out even if the station
is asleep. This is done by iwl_mvm_sta_modify_sleep_tx_count.

Reported-and-tested-by: Jens Axboe <axboe@kernel.dk>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c |  5 +--
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c      | 11 +++---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.h      |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c       | 41 ++++++++++-------------
 4 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index d37b1695c64e..6927caecd48e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2319,7 +2319,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw,
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 
-	/* Called when we need to transmit (a) frame(s) from agg queue */
+	/* Called when we need to transmit (a) frame(s) from agg or dqa queue */
 
 	iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames,
 					  tids, more_data, true);
@@ -2338,7 +2338,8 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw,
 	for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) {
 		struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid];
 
-		if (tid_data->state != IWL_AGG_ON &&
+		if (!iwl_mvm_is_dqa_supported(mvm) &&
+		    tid_data->state != IWL_AGG_ON &&
 		    tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA)
 			continue;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index bd1dcc863d8f..b51a2853cc80 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -3135,7 +3135,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 				       struct ieee80211_sta *sta,
 				       enum ieee80211_frame_release_type reason,
 				       u16 cnt, u16 tids, bool more_data,
-				       bool agg)
+				       bool single_sta_queue)
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_mvm_add_sta_cmd cmd = {
@@ -3155,14 +3155,14 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 	for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT)
 		cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]);
 
-	/* If we're releasing frames from aggregation queues then check if the
-	 * all queues combined that we're releasing frames from have
+	/* If we're releasing frames from aggregation or dqa queues then check
+	 * if all the queues that we're releasing frames from, combined, have:
 	 *  - more frames than the service period, in which case more_data
 	 *    needs to be set
 	 *  - fewer than 'cnt' frames, in which case we need to adjust the
 	 *    firmware command (but do that unconditionally)
 	 */
-	if (agg) {
+	if (single_sta_queue) {
 		int remaining = cnt;
 		int sleep_tx_count;
 
@@ -3172,7 +3172,8 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 			u16 n_queued;
 
 			tid_data = &mvmsta->tid_data[tid];
-			if (WARN(tid_data->state != IWL_AGG_ON &&
+			if (WARN(!iwl_mvm_is_dqa_supported(mvm) &&
+				 tid_data->state != IWL_AGG_ON &&
 				 tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA,
 				 "TID %d state is %d\n",
 				 tid, tid_data->state)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 4be34f902278..1927ce607798 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -547,7 +547,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 				       struct ieee80211_sta *sta,
 				       enum ieee80211_frame_release_type reason,
 				       u16 cnt, u16 tids, bool more_data,
-				       bool agg);
+				       bool single_sta_queue);
 int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
 		      bool drain);
 void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index dd2b4a300819..3f37075f4cde 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,6 +34,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -628,8 +629,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 	 * values.
 	 * Note that we don't need to make sure it isn't agg'd, since we're
 	 * TXing non-sta
+	 * For DQA mode - we shouldn't increase it though
 	 */
-	atomic_inc(&mvm->pending_frames[sta_id]);
+	if (!iwl_mvm_is_dqa_supported(mvm))
+		atomic_inc(&mvm->pending_frames[sta_id]);
 
 	return 0;
 }
@@ -1005,11 +1008,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 	spin_unlock(&mvmsta->lock);
 
-	/* Increase pending frames count if this isn't AMPDU */
-	if ((iwl_mvm_is_dqa_supported(mvm) &&
-	     mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON &&
-	     mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) ||
-	    (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu))
+	/* Increase pending frames count if this isn't AMPDU or DQA queue */
+	if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)
 		atomic_inc(&mvm->pending_frames[mvmsta->sta_id]);
 
 	return 0;
@@ -1079,12 +1079,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvmsta->lock);
 
 	if ((tid_data->state == IWL_AGG_ON ||
-	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) &&
+	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA ||
+	     iwl_mvm_is_dqa_supported(mvm)) &&
 	    iwl_mvm_tid_queued(tid_data) == 0) {
 		/*
-		 * Now that this aggregation queue is empty tell mac80211 so it
-		 * knows we no longer have frames buffered for the station on
-		 * this TID (for the TIM bitmap calculation.)
+		 * Now that this aggregation or DQA queue is empty tell
+		 * mac80211 so it knows we no longer have frames buffered for
+		 * the station on this TID (for the TIM bitmap calculation.)
 		 */
 		ieee80211_sta_set_buffered(sta, tid, false);
 	}
@@ -1257,7 +1258,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	u8 skb_freed = 0;
 	u16 next_reclaimed, seq_ctl;
 	bool is_ndp = false;
-	bool txq_agg = false; /* Is this TXQ aggregated */
 
 	__skb_queue_head_init(&skbs);
 
@@ -1283,6 +1283,10 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 			info->flags |= IEEE80211_TX_STAT_ACK;
 			break;
 		case TX_STATUS_FAIL_DEST_PS:
+			/* In DQA, the FW should have stopped the queue and not
+			 * return this status
+			 */
+			WARN_ON(iwl_mvm_is_dqa_supported(mvm));
 			info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
 			break;
 		default:
@@ -1387,15 +1391,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 			bool send_eosp_ndp = false;
 
 			spin_lock_bh(&mvmsta->lock);
-			if (iwl_mvm_is_dqa_supported(mvm)) {
-				enum iwl_mvm_agg_state state;
-
-				state = mvmsta->tid_data[tid].state;
-				txq_agg = (state == IWL_AGG_ON ||
-					state == IWL_EMPTYING_HW_QUEUE_DELBA);
-			} else {
-				txq_agg = txq_id >= mvm->first_agg_queue;
-			}
 
 			if (!is_ndp) {
 				tid_data->next_reclaimed = next_reclaimed;
@@ -1452,11 +1447,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	 * If the txq is not an AMPDU queue, there is no chance we freed
 	 * several skbs. Check that out...
 	 */
-	if (txq_agg)
+	if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue)
 		goto out;
 
 	/* We can't free more than one frame at once on a shared queue */
-	WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1));
+	WARN_ON(skb_freed > 1);
 
 	/* If we have still frames for this STA nothing to do here */
 	if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id]))
-- 
cgit v1.2.3


From b7048ea12fbb2724ee0cd30752d4fac43cab0651 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 15 Mar 2017 16:31:58 +0200
Subject: drm/i915: Do .init_clock_gating() earlier to avoid it clobbering
 watermarks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently ILK-BDW explicitly disable LP1+ watermarks from their
.init_clock_gating() hooks. Unfortunately that hook gets called way too
late since by that time we've already initialized all the watermark
state tracking which then gets out of sync with the hardware state.

We may eventually want to consider killing off the explicit LP1+
disable from .init_clock_gating(). In the meantime however, we can
avoid the problem by reordering the init sequence such that
intel_modeset_init_hw()->intel_init_clock_gating() gets called
prior to the hardware state takeover.

I suppose prior to the two stage watermark programming we were
magically saved by something that forced the watermarks to be
reprogrammed fully after .init_clock_gating() got called. But
now that no longer happens.

Note that the diff might look a bit odd as it kills off one
call of intel_update_cdclk(), but that's fine because
intel_modeset_init_hw() does the exact same thing. Previously
we just did it twice.

Actually even this new init sequence is pretty bogus as
.init_clock_gating() really should be called before any gem
hardware init since it can  configure various clock gating
workarounds and whatnot that affect the GT side as well. Also
intel_modeset_init() really should get split up into better
defined init stages. Another "fun" detail is that
intel_modeset_gem_init() is where RPS/RC6 gets configured.
Why that is done from the display code is beyond me. I've
decided to leave all this be for now, and just try to fix
the init sequence enough for watermarks to work.

Cc: stable@vger.kernel.org
Cc: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Cc: David Purton <dcpurton@marshwiggle.net>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reported-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Reported-by: David Purton <dcpurton@marshwiggle.net>
Tested-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96645
Fixes: ed4a6a7ca853 ("drm/i915: Add two-stage ILK-style watermark programming (v11)")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170220140443.30891-1-ville.syrjala@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170315143158.31780-1-ville.syrjala@linux.intel.com
(cherry picked from commit 5be6e33400992d3450e1c8234a5af353e1560580)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3282b0f4b134..ed1f4f272b4f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -16696,12 +16696,11 @@ int intel_modeset_init(struct drm_device *dev)
 		}
 	}
 
-	intel_update_czclk(dev_priv);
-	intel_update_cdclk(dev_priv);
-	dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
-
 	intel_shared_dpll_init(dev);
 
+	intel_update_czclk(dev_priv);
+	intel_modeset_init_hw(dev);
+
 	if (dev_priv->max_cdclk_freq == 0)
 		intel_update_max_cdclk(dev_priv);
 
@@ -17258,8 +17257,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
 
 	intel_init_gt_powersave(dev_priv);
 
-	intel_modeset_init_hw(dev);
-
 	intel_setup_overlay(dev_priv);
 }
 
-- 
cgit v1.2.3


From abda288bb207e5c681306299126af8c022709c18 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 19 Feb 2017 16:33:35 -0800
Subject: auxdisplay: img-ascii-lcd: add missing sentinel entry in
 img_ascii_lcd_matches

The OF device table must be terminated, otherwise we'll be walking past it
and into areas unknown.

Fixes: 0cad855fbd08 ("auxdisplay: img-ascii-lcd: driver for simple ASCII...")
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/auxdisplay/img-ascii-lcd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
index bf43b5d2aafc..83f1439e57fd 100644
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -218,6 +218,7 @@ static const struct of_device_id img_ascii_lcd_matches[] = {
 	{ .compatible = "img,boston-lcd", .data = &boston_config },
 	{ .compatible = "mti,malta-lcd", .data = &malta_config },
 	{ .compatible = "mti,sead3-lcd", .data = &sead3_config },
+	{ /* sentinel */ }
 };
 
 /**
-- 
cgit v1.2.3


From 4e841d3eb9294ce4137fdb5d0a88f1bceab9c212 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 10 Mar 2017 17:39:21 -0800
Subject: mwifiex: pcie: don't leak DMA buffers when removing

When PCIe FLR support was added, much of the remove/release code for
PCIe was migrated to ->down_dev(), but ->down_dev() is never called for
device removal. Let's refactor the cleanup to be done in both cases.

Also, drop the comments above mwifiex_cleanup_pcie(), because they were
clearly wrong, and it's better to have clear and obvious code than to
detail the code steps in comments anyway.

Fixes: 4c5dae59d2e9 ("mwifiex: add PCIe function level reset support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/pcie.c | 38 ++++++++++++++---------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index a0d918094889..b8c990d10d6e 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -2739,6 +2739,21 @@ static void mwifiex_pcie_device_dump(struct mwifiex_adapter *adapter)
 	schedule_work(&card->work);
 }
 
+static void mwifiex_pcie_free_buffers(struct mwifiex_adapter *adapter)
+{
+	struct pcie_service_card *card = adapter->card;
+	const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+	if (reg->sleep_cookie)
+		mwifiex_pcie_delete_sleep_cookie_buf(adapter);
+
+	mwifiex_pcie_delete_cmdrsp_buf(adapter);
+	mwifiex_pcie_delete_evtbd_ring(adapter);
+	mwifiex_pcie_delete_rxbd_ring(adapter);
+	mwifiex_pcie_delete_txbd_ring(adapter);
+	card->cmdrsp_buf = NULL;
+}
+
 /*
  * This function initializes the PCI-E host memory space, WCB rings, etc.
  *
@@ -2850,13 +2865,6 @@ err_enable_dev:
 
 /*
  * This function cleans up the allocated card buffers.
- *
- * The following are freed by this function -
- *      - TXBD ring buffers
- *      - RXBD ring buffers
- *      - Event BD ring buffers
- *      - Command response ring buffer
- *      - Sleep cookie buffer
  */
 static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
 {
@@ -2875,6 +2883,8 @@ static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
 				    "Failed to write driver not-ready signature\n");
 	}
 
+	mwifiex_pcie_free_buffers(adapter);
+
 	if (pdev) {
 		pci_iounmap(pdev, card->pci_mmap);
 		pci_iounmap(pdev, card->pci_mmap1);
@@ -3126,10 +3136,7 @@ err_cre_txbd:
 	pci_iounmap(pdev, card->pci_mmap1);
 }
 
-/* This function cleans up the PCI-E host memory space.
- * Some code is extracted from mwifiex_unregister_dev()
- *
- */
+/* This function cleans up the PCI-E host memory space. */
 static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
 {
 	struct pcie_service_card *card = adapter->card;
@@ -3140,14 +3147,7 @@ static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
 
 	adapter->seq_num = 0;
 
-	if (reg->sleep_cookie)
-		mwifiex_pcie_delete_sleep_cookie_buf(adapter);
-
-	mwifiex_pcie_delete_cmdrsp_buf(adapter);
-	mwifiex_pcie_delete_evtbd_ring(adapter);
-	mwifiex_pcie_delete_rxbd_ring(adapter);
-	mwifiex_pcie_delete_txbd_ring(adapter);
-	card->cmdrsp_buf = NULL;
+	mwifiex_pcie_free_buffers(adapter);
 }
 
 static struct mwifiex_if_ops pcie_ops = {
-- 
cgit v1.2.3


From ba1c7e45ec224cc8d2df33ecaee1946d48e79231 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 10 Mar 2017 17:39:22 -0800
Subject: mwifiex: set adapter->dev before starting to use mwifiex_dbg()

The mwifiex_dbg() log handler utilizes the struct device in
adapter->dev. Without it, it decides not to print anything.

As of commit 2e02b5814217 ("mwifiex: Allow mwifiex early access to device
structure"), we started assigning that pointer only after we finished
mwifiex_register() -- this effectively neuters any mwifiex_dbg() logging
done before this point.

Let's move the device assignment into mwifiex_register().

Fixes: 2e02b5814217 ("mwifiex: Allow mwifiex early access to device structure")
Cc: Rajat Jain <rajatja@google.com>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 5ebca1d0cfc7..43d040e02e4d 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -57,8 +57,8 @@ MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0");
  * In case of any errors during inittialization, this function also ensures
  * proper cleanup before exiting.
  */
-static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops,
-			    void **padapter)
+static int mwifiex_register(void *card, struct device *dev,
+			    struct mwifiex_if_ops *if_ops, void **padapter)
 {
 	struct mwifiex_adapter *adapter;
 	int i;
@@ -68,6 +68,7 @@ static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops,
 		return -ENOMEM;
 
 	*padapter = adapter;
+	adapter->dev = dev;
 	adapter->card = card;
 
 	/* Save interface specific operations in adapter */
@@ -1568,12 +1569,11 @@ mwifiex_add_card(void *card, struct completion *fw_done,
 {
 	struct mwifiex_adapter *adapter;
 
-	if (mwifiex_register(card, if_ops, (void **)&adapter)) {
+	if (mwifiex_register(card, dev, if_ops, (void **)&adapter)) {
 		pr_err("%s: software init failed\n", __func__);
 		goto err_init_sw;
 	}
 
-	adapter->dev = dev;
 	mwifiex_probe_of(adapter);
 
 	adapter->iface_type = iface_type;
-- 
cgit v1.2.3


From 36908c4e5b1063eff3e11336fab544a76c625b69 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 10 Mar 2017 17:39:23 -0800
Subject: mwifiex: uninit wakeup info when removing device

We manually init wakeup info, but we don't detach it on device removal.
This means that if we (for example) rmmod + modprobe the driver, the
device framework might return -EEXIST the second time, and we'll
complain in the logs:

[  839.311881] mwifiex_pcie 0000:01:00.0: fail to init wakeup for mwifiex

AFAICT, there's no other negative effect.

But we can fix this by disabling wakeup on remove, similar to what a few
other drivers do (e.g., the power supply framework).

This code (and bug) has existed on SDIO for a while, but it got moved
around and enabled for PCIe with commit 853402a00823 ("mwifiex: Enable
WoWLAN for both sdio and pcie").

Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 43d040e02e4d..b62e03d11c2e 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1718,6 +1718,9 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter)
 	wiphy_unregister(adapter->wiphy);
 	wiphy_free(adapter->wiphy);
 
+	if (adapter->irq_wakeup >= 0)
+		device_init_wakeup(adapter->dev, false);
+
 	/* Unregister device */
 	mwifiex_dbg(adapter, INFO,
 		    "info: unregister device\n");
-- 
cgit v1.2.3


From cf8c44d42c4f4f38468a53e9ce2a0314e7ebeaa1 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Tue, 28 Feb 2017 18:54:31 +0530
Subject: MAINTAINERS: update for mwifiex driver maintainers

Ganapathi & Xinming are starting to take a more active role in the
mwifiex driver maintainership here onwards on account of organizational
changes.

CC: Xinming Hu <huxm@marvell.com>
CC: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Nishant Sarmukadam <nishants@marvell.com>
Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 33875e07482b..078c38217daa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7846,6 +7846,8 @@ F:	drivers/net/ethernet/marvell/mvneta.*
 MARVELL MWIFIEX WIRELESS DRIVER
 M:	Amitkumar Karwar <akarwar@marvell.com>
 M:	Nishant Sarmukadam <nishants@marvell.com>
+M:	Ganapathi Bhat <gbhat@marvell.com>
+M:	Xinming Hu <huxm@marvell.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F:	drivers/net/wireless/marvell/mwifiex/
-- 
cgit v1.2.3


From 6bce725a78de1b171928ce66dec2bae4b569e5d1 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 8 Mar 2017 14:30:34 +0100
Subject: x86/mpx: Make unnecessarily global function static

Make the function get_user_bd_entry() static as it is not used outside of
arch/x86/mm/mpx.c

This fixes a sparse warning.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/mpx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 5126dfd52b18..cd44ae727df7 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
  * we might run off the end of the bounds table if we are on
  * a 64-bit kernel and try to get 8 bytes.
  */
-int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
 		long __user *bd_entry_ptr)
 {
 	u32 bd_entry_32;
-- 
cgit v1.2.3


From dcc3b5ffe1b32771c9a22e2c916fb94c4fcf5b79 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 6 Mar 2017 21:51:28 -0800
Subject: sched/deadline: Add missing update_rq_clock() in dl_task_timer()

The following warning can be triggered by hot-unplugging the CPU
on which an active SCHED_DEADLINE task is running on:

 ------------[ cut here ]------------
 WARNING: CPU: 7 PID: 0 at kernel/sched/sched.h:833 replenish_dl_entity+0x71e/0xc40
 rq->clock_update_flags < RQCF_ACT_SKIP
 CPU: 7 PID: 0 Comm: swapper/7 Tainted: G    B           4.11.0-rc1+ #24
 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016
 Call Trace:
  <IRQ>
  dump_stack+0x85/0xc4
  __warn+0x172/0x1b0
  warn_slowpath_fmt+0xb4/0xf0
  ? __warn+0x1b0/0x1b0
  ? debug_check_no_locks_freed+0x2c0/0x2c0
  ? cpudl_set+0x3d/0x2b0
  replenish_dl_entity+0x71e/0xc40
  enqueue_task_dl+0x2ea/0x12e0
  ? dl_task_timer+0x777/0x990
  ? __hrtimer_run_queues+0x270/0xa50
  dl_task_timer+0x316/0x990
  ? enqueue_task_dl+0x12e0/0x12e0
  ? enqueue_task_dl+0x12e0/0x12e0
  __hrtimer_run_queues+0x270/0xa50
  ? hrtimer_cancel+0x20/0x20
  ? hrtimer_interrupt+0x119/0x600
  hrtimer_interrupt+0x19c/0x600
  ? trace_hardirqs_off+0xd/0x10
  local_apic_timer_interrupt+0x74/0xe0
  smp_apic_timer_interrupt+0x76/0xa0
  apic_timer_interrupt+0x93/0xa0

The DL task will be migrated to a suitable later deadline rq once the DL
timer fires and currnet rq is offline. The rq clock of the new rq should
be updated. This patch fixes it by updating the rq clock after holding
the new rq's rq lock.

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1488865888-15894-1-git-send-email-wanpeng.li@hotmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 99b2c33a9fbc..c6db3fd727fe 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -638,6 +638,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 		lockdep_unpin_lock(&rq->lock, rf.cookie);
 		rq = dl_task_offline_migration(rq, p);
 		rf.cookie = lockdep_pin_lock(&rq->lock);
+		update_rq_clock(rq);
 
 		/*
 		 * Now that the task has been migrated to the new RQ and we
-- 
cgit v1.2.3


From 6e5f32f7a43f45ee55c401c0b9585eb01f9629a8 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Fri, 17 Feb 2017 12:07:30 +0000
Subject: sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ
 accounting

If we crossed a sample window while in NO_HZ we will add LOAD_FREQ to
the pending sample window time on exit, setting the next update not
one window into the future, but two.

This situation on exiting NO_HZ is described by:

  this_rq->calc_load_update < jiffies < calc_load_update

In this scenario, what we should be doing is:

  this_rq->calc_load_update = calc_load_update		     [ next window ]

But what we actually do is:

  this_rq->calc_load_update = calc_load_update + LOAD_FREQ   [ next+1 window ]

This has the effect of delaying load average updates for potentially
up to ~9seconds.

This can result in huge spikes in the load average values due to
per-cpu uninterruptible task counts being out of sync when accumulated
across all CPUs.

It's safe to update the per-cpu active count if we wake between sample
windows because any load that we left in 'calc_load_idle' will have
been zero'd when the idle load was folded in calc_global_load().

This issue is easy to reproduce before,

  commit 9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking")

just by forking short-lived process pipelines built from ps(1) and
grep(1) in a loop. I'm unable to reproduce the spikes after that
commit, but the bug still seems to be present from code review.

Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Fixes: commit 5167e8d ("sched/nohz: Rewrite and fix load-avg computation -- again")
Link: http://lkml.kernel.org/r/20170217120731.11868-2-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/loadavg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 7296b7308eca..3a55f3f9ffe4 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
 	struct rq *this_rq = this_rq();
 
 	/*
-	 * If we're still before the sample window, we're done.
+	 * If we're still before the pending sample window, we're done.
 	 */
+	this_rq->calc_load_update = calc_load_update;
 	if (time_before(jiffies, this_rq->calc_load_update))
 		return;
 
@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
 	 * accounted through the nohz accounting, so skip the entire deal and
 	 * sync up for the next window.
 	 */
-	this_rq->calc_load_update = calc_load_update;
 	if (time_before(jiffies, this_rq->calc_load_update + 10))
 		this_rq->calc_load_update += LOAD_FREQ;
 }
-- 
cgit v1.2.3


From caeb5882979bc6f3c8766fcf59c6269b38f521bc Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Fri, 17 Feb 2017 12:07:31 +0000
Subject: sched/loadavg: Use {READ,WRITE}_ONCE() for sample window

'calc_load_update' is accessed without any kind of locking and there's
a clear assumption in the code that only a single value is read or
written.

Make this explicit by using READ_ONCE() and WRITE_ONCE(), and avoid
unintentionally seeing multiple values, or having the load/stores
split.

Technically the loads in calc_global_*() don't require this since
those are the only functions that update 'calc_load_update', but I've
added the READ_ONCE() for consistency.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: http://lkml.kernel.org/r/20170217120731.11868-3-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/loadavg.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 3a55f3f9ffe4..f15fb2bdbc0d 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
 	 * If the folding window started, make sure we start writing in the
 	 * next idle-delta.
 	 */
-	if (!time_before(jiffies, calc_load_update))
+	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
 		idx++;
 
 	return idx & 1;
@@ -204,7 +204,7 @@ void calc_load_exit_idle(void)
 	/*
 	 * If we're still before the pending sample window, we're done.
 	 */
-	this_rq->calc_load_update = calc_load_update;
+	this_rq->calc_load_update = READ_ONCE(calc_load_update);
 	if (time_before(jiffies, this_rq->calc_load_update))
 		return;
 
@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
  */
 static void calc_global_nohz(void)
 {
+	unsigned long sample_window;
 	long delta, active, n;
 
-	if (!time_before(jiffies, calc_load_update + 10)) {
+	sample_window = READ_ONCE(calc_load_update);
+	if (!time_before(jiffies, sample_window + 10)) {
 		/*
 		 * Catch-up, fold however many we are behind still
 		 */
-		delta = jiffies - calc_load_update - 10;
+		delta = jiffies - sample_window - 10;
 		n = 1 + (delta / LOAD_FREQ);
 
 		active = atomic_long_read(&calc_load_tasks);
@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
 		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
 		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
 
-		calc_load_update += n * LOAD_FREQ;
+		WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
 	}
 
 	/*
@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
  */
 void calc_global_load(unsigned long ticks)
 {
+	unsigned long sample_window;
 	long active, delta;
 
-	if (time_before(jiffies, calc_load_update + 10))
+	sample_window = READ_ONCE(calc_load_update);
+	if (time_before(jiffies, sample_window + 10))
 		return;
 
 	/*
@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
 	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
 	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
 
-	calc_load_update += LOAD_FREQ;
+	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
 
 	/*
 	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
-- 
cgit v1.2.3


From 17fcbd590d0c3e35bd9646e2215f86586378bc42 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Sat, 25 Feb 2017 01:17:53 +0100
Subject: locking/rwsem: Fix down_write_killable() for
 CONFIG_RWSEM_GENERIC_SPINLOCK=y

We hang if SIGKILL has been sent, but the task is stuck in down_read()
(after do_exit()), even though no task is doing down_write() on the
rwsem in question:

  INFO: task libupnp:21868 blocked for more than 120 seconds.
  libupnp         D    0 21868      1 0x08100008
  ...
  Call Trace:
  __schedule()
  schedule()
  __down_read()
  do_exit()
  do_group_exit()
  __wake_up_parent()

This bug has already been fixed for CONFIG_RWSEM_XCHGADD_ALGORITHM=y in
the following commit:

 04cafed7fc19 ("locking/rwsem: Fix down_write_killable()")

... however, this bug also exists for CONFIG_RWSEM_GENERIC_SPINLOCK=y.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Niklas Cassel <niklass@axis.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d47996082f52 ("locking/rwsem: Introduce basis for down_write_killable()")
Link: http://lkml.kernel.org/r/1487981873-12649-1-git-send-email-niklass@axis.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-spinlock.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 7bc24d477805..c65f7989f850 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
 		 */
 		if (sem->count == 0)
 			break;
-		if (signal_pending_state(state, current)) {
-			ret = -EINTR;
-			goto out;
-		}
+		if (signal_pending_state(state, current))
+			goto out_nolock;
+
 		set_current_state(state);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		schedule();
@@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
 	}
 	/* got the lock */
 	sem->count = -1;
-out:
 	list_del(&waiter.list);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	return ret;
+
+out_nolock:
+	list_del(&waiter.list);
+	if (!list_empty(&sem->wait_list))
+		__rwsem_do_wake(sem, 1);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	return -EINTR;
 }
 
 void __sched __down_write(struct rw_semaphore *sem)
-- 
cgit v1.2.3


From 03270c6ac6207fc55bbf9d20d195029dca210c79 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 6 Mar 2017 23:23:42 +0000
Subject: parport: fix attempt to write duplicate procfiles

Usually every parallel port will have a single pardev registered with
it. But ppdev driver is an exception. This userspace parallel port
driver allows to create multiple parrallel port devices for a single
parallel port. And as a result we were having a nice warning like:
"sysctl table check failed:
/dev/parport/parport0/devices/ppdev0/timeslice Sysctl already exists"

Use the same logic as used in parport_register_device() and register
the proc files only once for each parallel port.

Fixes: 6fa45a226897 ("parport: add device-model to parport subsystem")
Cc: stable <stable@vger.kernel.org> # v4.4+
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1414656
Bugzilla: https://bugs.archlinux.org/task/52322
Tested-by: James Feeney <james@nurealm.net>
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/parport/share.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index bc090daa850a..5dc53d420ca8 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -939,8 +939,10 @@ parport_register_dev_model(struct parport *port, const char *name,
 	 * pardevice fields. -arca
 	 */
 	port->ops->init_state(par_dev, par_dev->state);
-	port->proc_device = par_dev;
-	parport_device_proc_register(par_dev);
+	if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) {
+		port->proc_device = par_dev;
+		parport_device_proc_register(par_dev);
+	}
 
 	return par_dev;
 
-- 
cgit v1.2.3


From 9a69645dde1188723d80745c1bc6ee9af2cbe2a7 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 6 Mar 2017 23:23:43 +0000
Subject: ppdev: fix registering same device name

Usually every parallel port will have a single pardev registered with
it. But ppdev driver is an exception. This userspace parallel port
driver allows to create multiple parrallel port devices for a single
parallel port. And as a result we were having a big warning like:
"sysfs: cannot create duplicate filename '/devices/parport0/ppdev0.0'".
And with that many parallel port printers stopped working.

We have been using the minor number as the id field while registering
a parralel port device with a parralel port. But when there are
multiple parrallel port device for one single parallel port, they all
tried to register with the same name like 'pardev0.0' and everything
started failing.
Use an incremented index as the id instead of the minor number.

Fixes: 8b7d3a9d903e ("ppdev: use new parport device model")
Cc: stable <stable@vger.kernel.org> # v4.9+
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1414656
Bugzilla: https://bugs.archlinux.org/task/52322
Tested-by: James Feeney <james@nurealm.net>
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ppdev.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 2a558c706581..3e73bcdf9e65 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -84,11 +84,14 @@ struct pp_struct {
 	struct ieee1284_info state;
 	struct ieee1284_info saved_state;
 	long default_inactivity;
+	int index;
 };
 
 /* should we use PARDEVICE_MAX here? */
 static struct device *devices[PARPORT_MAX];
 
+static DEFINE_IDA(ida_index);
+
 /* pp_struct.flags bitfields */
 #define PP_CLAIMED    (1<<0)
 #define PP_EXCL       (1<<1)
@@ -290,7 +293,7 @@ static int register_device(int minor, struct pp_struct *pp)
 	struct pardevice *pdev = NULL;
 	char *name;
 	struct pardev_cb ppdev_cb;
-	int rc = 0;
+	int rc = 0, index;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -303,20 +306,23 @@ static int register_device(int minor, struct pp_struct *pp)
 		goto err;
 	}
 
+	index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL);
 	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
 	ppdev_cb.irq_func = pp_irq;
 	ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
 	ppdev_cb.private = pp;
-	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
+	pdev = parport_register_dev_model(port, name, &ppdev_cb, index);
 	parport_put_port(port);
 
 	if (!pdev) {
 		pr_warn("%s: failed to register device!\n", name);
 		rc = -ENXIO;
+		ida_simple_remove(&ida_index, index);
 		goto err;
 	}
 
 	pp->pdev = pdev;
+	pp->index = index;
 	dev_dbg(&pdev->dev, "registered pardevice\n");
 err:
 	kfree(name);
@@ -755,6 +761,7 @@ static int pp_release(struct inode *inode, struct file *file)
 
 	if (pp->pdev) {
 		parport_unregister_device(pp->pdev);
+		ida_simple_remove(&ida_index, pp->index);
 		pp->pdev = NULL;
 		pr_debug(CHRDEV "%x: unregistered pardevice\n", minor);
 	}
-- 
cgit v1.2.3


From c3423563c68fc454b805b46cb69fd4816db933e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 3 Mar 2017 07:58:06 -0700
Subject: vmw_vmci: handle the return value from pci_alloc_irq_vectors
 correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It returns the number of vectors allocated when successful, so check for
a negative error only.

Fixes: 3bb434cd ("vmw_vmci: switch to pci_irq_alloc_vectors")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Loïc Yhuel <loic.yhuel@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_vmci/vmci_guest.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index 9d659542a335..dad5abee656e 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -566,10 +566,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 	 */
 	error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
 			PCI_IRQ_MSIX);
-	if (error) {
+	if (error < 0) {
 		error = pci_alloc_irq_vectors(pdev, 1, 1,
 				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
-		if (error)
+		if (error < 0)
 			goto err_remove_bitmap;
 	} else {
 		vmci_dev->exclusive_vectors = true;
-- 
cgit v1.2.3


From 5ac69d37784b237707a7b15d199cdb6c6fdb6780 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Thu, 2 Mar 2017 15:10:57 +0100
Subject: sched/deadline: Make sure the replenishment timer fires in the next
 period

Currently, the replenishment timer is set to fire at the deadline
of a task. Although that works for implicit deadline tasks because the
deadline is equals to the begin of the next period, that is not correct
for constrained deadline tasks (deadline < period).

For instance:

f.c:
 --------------- %< ---------------
int main (void)
{
	for(;;);
}
 --------------- >% ---------------

  # gcc -o f f.c

  # trace-cmd record -e sched:sched_switch                              \
				   -e syscalls:sys_exit_sched_setattr   \
   chrt -d --sched-runtime  490000000					\
           --sched-deadline 500000000					\
	   --sched-period  1000000000 0 ./f

  # trace-cmd report | grep "{pid of ./f}"

After setting parameters, the task is replenished and continue running
until being throttled:

         f-11295 [003] 13322.113776: sys_exit_sched_setattr: 0x0

The task is throttled after running 492318 ms, as expected:

         f-11295 [003] 13322.606094: sched_switch:   f:11295 [-1] R ==> watchdog/3:32 [0]

But then, the task is replenished 500719 ms after the first
replenishment:

    <idle>-0     [003] 13322.614495: sched_switch:   swapper/3:0 [120] R ==> f:11295 [-1]

Running for 490277 ms:

         f-11295 [003] 13323.104772: sched_switch:   f:11295 [-1] R ==>  swapper/3:0 [120]

Hence, in the first period, the task runs 2 * runtime, and that is a bug.

During the first replenishment, the next deadline is set one period away.
So the runtime / period starts to be respected. However, as the second
replenishment took place in the wrong instant, the next replenishment
will also be held in a wrong instant of time. Rather than occurring in
the nth period away from the first activation, it is taking place
in the (nth period - relative deadline).

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Luca Abeni <luca.abeni@santannapisa.it>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/ac50d89887c25285b47465638354b63362f8adff.1488392936.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index c6db3fd727fe..445e2787bf80 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	}
 }
 
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+	return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
 /*
  * If the entity depleted all its runtime, and if we want it to sleep
  * while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
  * and try to activate it.
  *
  * Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
 	 * that it is actually coming from rq->clock and not from
 	 * hrtimer's time base reading.
 	 */
-	act = ns_to_ktime(dl_se->deadline);
+	act = ns_to_ktime(dl_next_period(dl_se));
 	now = hrtimer_cb_get_time(timer);
 	delta = ktime_to_ns(now) - rq_clock(rq);
 	act = ktime_add_ns(act, delta);
-- 
cgit v1.2.3


From df8eac8cafce7d086be3bd5cf5a838fa37594dfb Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Thu, 2 Mar 2017 15:10:58 +0100
Subject: sched/deadline: Throttle a constrained deadline task activated after
 the deadline

During the activation, CBS checks if it can reuse the current task's
runtime and period. If the deadline of the task is in the past, CBS
cannot use the runtime, and so it replenishes the task. This rule
works fine for implicit deadline tasks (deadline == period), and the
CBS was designed for implicit deadline tasks. However, a task with
constrained deadline (deadine < period) might be awakened after the
deadline, but before the next period. In this case, replenishing the
task would allow it to run for runtime / deadline. As in this case
deadline < period, CBS enables a task to run for more than the
runtime / period. In a very loaded system, this can cause a domino
effect, making other tasks miss their deadlines.

To avoid this problem, in the activation of a constrained deadline
task after the deadline but before the next period, throttle the
task and set the replenishing timer to the begin of the next period,
unless it is boosted.

Reproducer:

 --------------- %< ---------------
  int main (int argc, char **argv)
  {
	int ret;
	int flags = 0;
	unsigned long l = 0;
	struct timespec ts;
	struct sched_attr attr;

	memset(&attr, 0, sizeof(attr));
	attr.size = sizeof(attr);

	attr.sched_policy   = SCHED_DEADLINE;
	attr.sched_runtime  = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_deadline = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_period   = 2 * 1000 * 1000 * 1000;	/* 2 s */

	ts.tv_sec = 0;
	ts.tv_nsec = 2000 * 1000;			/* 2 ms */

	ret = sched_setattr(0, &attr, flags);

	if (ret < 0) {
		perror("sched_setattr");
		exit(-1);
	}

	for(;;) {
		/* XXX: you may need to adjust the loop */
		for (l = 0; l < 150000; l++);
		/*
		 * The ideia is to go to sleep right before the deadline
		 * and then wake up before the next period to receive
		 * a new replenishment.
		 */
		nanosleep(&ts, NULL);
	}

	exit(0);
  }
  --------------- >% ---------------

On my box, this reproducer uses almost 50% of the CPU time, which is
obviously wrong for a task with 2/2000 reservation.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/edf58354e01db46bf42df8d2dd32418833f68c89.1488392936.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 445e2787bf80..736d8b9d9bab 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -695,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 	timer->function = dl_task_timer;
 }
 
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+	    dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+			return;
+		dl_se->dl_throttled = 1;
+	}
+}
+
 static
 int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
 {
@@ -928,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 	__dequeue_dl_entity(dl_se);
 }
 
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_deadline < dl_se->dl_period;
+}
+
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -953,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 		return;
 	}
 
+	/*
+	 * Check if a constrained deadline task was activated
+	 * after the deadline but before the next period.
+	 * If that is the case, the task will be throttled and
+	 * the replenishment timer will be set to the next period.
+	 */
+	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+		dl_check_constrained_dl(&p->dl);
+
 	/*
 	 * If p is throttled, we do nothing. In fact, if it exhausted
 	 * its budget it needs a replenishment and, since it now is on
-- 
cgit v1.2.3


From 2317d5f1c34913bac5971d93d69fb6c31bb74670 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 15:10:59 +0100
Subject: sched/deadline: Use deadline instead of period when calculating
 overflow

I was testing Daniel's changes with his test case, and tweaked it a
little. Instead of having the runtime equal to the deadline, I
increased the deadline ten fold.

Daniel's test case had:

	attr.sched_runtime  = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_deadline = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_period   = 2 * 1000 * 1000 * 1000;	/* 2 s */

To make it more interesting, I changed it to:

	attr.sched_runtime  =  2 * 1000 * 1000;		/* 2 ms */
	attr.sched_deadline = 20 * 1000 * 1000;		/* 20 ms */
	attr.sched_period   =  2 * 1000 * 1000 * 1000;	/* 2 s */

The results were rather surprising. The behavior that Daniel's patch
was fixing came back. The task started using much more than .1% of the
CPU. More like 20%.

Looking into this I found that it was due to the dl_entity_overflow()
constantly returning true. That's because it uses the relative period
against relative runtime vs the absolute deadline against absolute
runtime.

  runtime / (deadline - t) > dl_runtime / dl_period

There's even a comment mentioning this, and saying that when relative
deadline equals relative period, that the equation is the same as using
deadline instead of period. That comment is backwards! What we really
want is:

  runtime / (deadline - t) > dl_runtime / dl_deadline

We care about if the runtime can make its deadline, not its period. And
then we can say "when the deadline equals the period, the equation is
the same as using dl_period instead of dl_deadline".

After correcting this, now when the task gets enqueued, it can throttle
correctly, and Daniel's fix to the throttling of sleeping deadline
tasks works even when the runtime and deadline are not the same.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/02135a27f1ae3fe5fd032568a5a2f370e190e8d7.1488392936.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 736d8b9d9bab..a2ce59015642 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  *
  * This function returns true if:
  *
- *   runtime / (deadline - t) > dl_runtime / dl_period ,
+ *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
  *
  * IOW we can't recycle current parameters.
  *
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
  * task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
  */
 static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 			       struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 	 * of anything below microseconds resolution is actually fiction
 	 * (but still we want to give the user that illusion >;).
 	 */
-	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+	left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
 	right = ((dl_se->deadline - t) >> DL_SCALE) *
 		(pi_se->dl_runtime >> DL_SCALE);
 
-- 
cgit v1.2.3


From ea90e0dc8cecba6359b481e24d9c37160f6f524f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Mar 2017 14:26:04 +0100
Subject: nl80211: fix dumpit error path RTNL deadlocks

Sowmini pointed out Dmitry's RTNL deadlock report to me, and it turns out
to be perfectly accurate - there are various error paths that miss unlock
of the RTNL.

To fix those, change the locking a bit to not be conditional in all those
nl80211_prepare_*_dump() functions, but make those require the RTNL to
start with, and fix the buggy error paths. This also let me use sparse
(by appropriately overriding the rtnl_lock/rtnl_unlock functions) to
validate the changes.

Cc: stable@vger.kernel.org
Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 127 ++++++++++++++++++++++---------------------------
 1 file changed, 56 insertions(+), 71 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d7f8be4e321a..2312dc2ffdb9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -545,22 +545,18 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 {
 	int err;
 
-	rtnl_lock();
-
 	if (!cb->args[0]) {
 		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
 				  genl_family_attrbuf(&nl80211_fam),
 				  nl80211_fam.maxattr, nl80211_policy);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		*wdev = __cfg80211_wdev_from_attrs(
 					sock_net(skb->sk),
 					genl_family_attrbuf(&nl80211_fam));
-		if (IS_ERR(*wdev)) {
-			err = PTR_ERR(*wdev);
-			goto out_unlock;
-		}
+		if (IS_ERR(*wdev))
+			return PTR_ERR(*wdev);
 		*rdev = wiphy_to_rdev((*wdev)->wiphy);
 		/* 0 is the first index - add 1 to parse only once */
 		cb->args[0] = (*rdev)->wiphy_idx + 1;
@@ -570,10 +566,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
 		struct wireless_dev *tmp;
 
-		if (!wiphy) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
+		if (!wiphy)
+			return -ENODEV;
 		*rdev = wiphy_to_rdev(wiphy);
 		*wdev = NULL;
 
@@ -584,21 +578,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 			}
 		}
 
-		if (!*wdev) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
+		if (!*wdev)
+			return -ENODEV;
 	}
 
 	return 0;
- out_unlock:
-	rtnl_unlock();
-	return err;
-}
-
-static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
-{
-	rtnl_unlock();
 }
 
 /* IE validation */
@@ -2608,17 +2592,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 	int filter_wiphy = -1;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
+	int ret;
 
 	rtnl_lock();
 	if (!cb->args[2]) {
 		struct nl80211_dump_wiphy_state state = {
 			.filter_wiphy = -1,
 		};
-		int ret;
 
 		ret = nl80211_dump_wiphy_parse(skb, cb, &state);
 		if (ret)
-			return ret;
+			goto out_unlock;
 
 		filter_wiphy = state.filter_wiphy;
 
@@ -2663,12 +2647,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 		wp_idx++;
 	}
  out:
-	rtnl_unlock();
-
 	cb->args[0] = wp_idx;
 	cb->args[1] = if_idx;
 
-	return skb->len;
+	ret = skb->len;
+ out_unlock:
+	rtnl_unlock();
+
+	return ret;
 }
 
 static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
@@ -4452,9 +4438,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
 	int sta_idx = cb->args[2];
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!wdev->netdev) {
 		err = -EINVAL;
@@ -4489,7 +4476,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 	cb->args[2] = sta_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 
 	return err;
 }
@@ -5275,9 +5262,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 	int path_idx = cb->args[2];
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!rdev->ops->dump_mpath) {
 		err = -EOPNOTSUPP;
@@ -5310,7 +5298,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 	cb->args[2] = path_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 	return err;
 }
 
@@ -5470,9 +5458,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
 	int path_idx = cb->args[2];
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!rdev->ops->dump_mpp) {
 		err = -EOPNOTSUPP;
@@ -5505,7 +5494,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
 	cb->args[2] = path_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 	return err;
 }
 
@@ -7674,9 +7663,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 	int start = cb->args[2], idx = 0;
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
-	if (err)
+	if (err) {
+		rtnl_unlock();
 		return err;
+	}
 
 	wdev_lock(wdev);
 	spin_lock_bh(&rdev->bss_lock);
@@ -7699,7 +7691,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 	wdev_unlock(wdev);
 
 	cb->args[2] = idx;
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 
 	return skb->len;
 }
@@ -7784,9 +7776,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 	int res;
 	bool radio_stats;
 
+	rtnl_lock();
 	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (res)
-		return res;
+		goto out_err;
 
 	/* prepare_wdev_dump parsed the attributes */
 	radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
@@ -7827,7 +7820,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 	cb->args[2] = survey_idx;
 	res = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 	return res;
 }
 
@@ -11508,17 +11501,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 	void *data = NULL;
 	unsigned int data_len = 0;
 
-	rtnl_lock();
-
 	if (cb->args[0]) {
 		/* subtract the 1 again here */
 		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
 		struct wireless_dev *tmp;
 
-		if (!wiphy) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
+		if (!wiphy)
+			return -ENODEV;
 		*rdev = wiphy_to_rdev(wiphy);
 		*wdev = NULL;
 
@@ -11538,23 +11527,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
 			  attrbuf, nl80211_fam.maxattr, nl80211_policy);
 	if (err)
-		goto out_unlock;
+		return err;
 
 	if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
-	    !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
+	    !attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
+		return -EINVAL;
 
 	*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
 	if (IS_ERR(*wdev))
 		*wdev = NULL;
 
 	*rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
-	if (IS_ERR(*rdev)) {
-		err = PTR_ERR(*rdev);
-		goto out_unlock;
-	}
+	if (IS_ERR(*rdev))
+		return PTR_ERR(*rdev);
 
 	vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
 	subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -11567,19 +11552,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 		if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
 			continue;
 
-		if (!vcmd->dumpit) {
-			err = -EOPNOTSUPP;
-			goto out_unlock;
-		}
+		if (!vcmd->dumpit)
+			return -EOPNOTSUPP;
 
 		vcmd_idx = i;
 		break;
 	}
 
-	if (vcmd_idx < 0) {
-		err = -EOPNOTSUPP;
-		goto out_unlock;
-	}
+	if (vcmd_idx < 0)
+		return -EOPNOTSUPP;
 
 	if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
 		data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
@@ -11596,9 +11577,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 
 	/* keep rtnl locked in successful case */
 	return 0;
- out_unlock:
-	rtnl_unlock();
-	return err;
 }
 
 static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -11613,9 +11591,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
 	int err;
 	struct nlattr *vendor_data;
 
+	rtnl_lock();
 	err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out;
 
 	vcmd_idx = cb->args[2];
 	data = (void *)cb->args[3];
@@ -11624,15 +11603,21 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
 
 	if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
 			   WIPHY_VENDOR_CMD_NEED_NETDEV)) {
-		if (!wdev)
-			return -EINVAL;
+		if (!wdev) {
+			err = -EINVAL;
+			goto out;
+		}
 		if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
-		    !wdev->netdev)
-			return -EINVAL;
+		    !wdev->netdev) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
-			if (!wdev_running(wdev))
-				return -ENETDOWN;
+			if (!wdev_running(wdev)) {
+				err = -ENETDOWN;
+				goto out;
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 7c468447f40645fbf2a033dfdaa92b1957130d50 Mon Sep 17 00:00:00 2001
From: Gary R Hook <ghook@amd.com>
Date: Fri, 10 Mar 2017 12:28:18 -0600
Subject: crypto: ccp - Assign DMA commands to the channel's CCP

The CCP driver generally uses a round-robin approach when
assigning operations to available CCPs. For the DMA engine,
however, the DMA mappings of the SGs are associated with a
specific CCP. When an IOMMU is enabled, the IOMMU is
programmed based on this specific device.

If the DMA operations are not performed by that specific
CCP then addressing errors and I/O page faults will occur.

Update the CCP driver to allow a specific CCP device to be
requested for an operation and use this in the DMA engine
support.

Cc: <stable@vger.kernel.org> # 4.9.x-
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev.c       | 5 ++++-
 drivers/crypto/ccp/ccp-dmaengine.c | 1 +
 include/linux/ccp.h                | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 511ab042b5e7..92d1c6959f08 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -283,11 +283,14 @@ EXPORT_SYMBOL_GPL(ccp_version);
  */
 int ccp_enqueue_cmd(struct ccp_cmd *cmd)
 {
-	struct ccp_device *ccp = ccp_get_device();
+	struct ccp_device *ccp;
 	unsigned long flags;
 	unsigned int i;
 	int ret;
 
+	/* Some commands might need to be sent to a specific device */
+	ccp = cmd->ccp ? cmd->ccp : ccp_get_device();
+
 	if (!ccp)
 		return -ENODEV;
 
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index e5d9278f4019..8d0eeb46d4a2 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -390,6 +390,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
 			goto err;
 
 		ccp_cmd = &cmd->ccp_cmd;
+		ccp_cmd->ccp = chan->ccp;
 		ccp_pt = &ccp_cmd->u.passthru_nomap;
 		ccp_cmd->flags = CCP_CMD_MAY_BACKLOG;
 		ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP;
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index c71dd8fa5764..c41b8d99dd0e 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -556,7 +556,7 @@ enum ccp_engine {
  * struct ccp_cmd - CCP operation request
  * @entry: list element (ccp driver use only)
  * @work: work element used for callbacks (ccp driver use only)
- * @ccp: CCP device to be run on (ccp driver use only)
+ * @ccp: CCP device to be run on
  * @ret: operation return code (ccp driver use only)
  * @flags: cmd processing flags
  * @engine: CCP operation to perform
-- 
cgit v1.2.3


From 69db7009318758769d625b023402161c750f7876 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 14 Mar 2017 07:36:01 -0400
Subject: hwrng: amd - Revert managed API changes

After commit 31b2a73c9c5f ("hwrng: amd - Migrate to managed API"), the
amd-rng driver uses devres with pci_dev->dev to keep track of resources,
but does not actually register a PCI driver.  This results in the
following issues:

1. The message

WARNING: CPU: 2 PID: 621 at drivers/base/dd.c:349 driver_probe_device+0x38c

is output when the i2c_amd756 driver loads and attempts to register a PCI
driver.  The PCI & device subsystems assume that no resources have been
registered for the device, and the WARN_ON() triggers since amd-rng has
already do so.

2.  The driver leaks memory because the driver does not attach to a
device.  The driver only uses the PCI device as a reference.   devm_*()
functions will release resources on driver detach, which the amd-rng
driver will never do.  As a result,

3.  The driver cannot be reloaded because there is always a use of the
ioport and region after the first load of the driver.

Revert the changes made by 31b2a73c9c5f ("hwrng: amd - Migrate to managed
API").

Cc: <stable@vger.kernel.org>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Fixes: 31b2a73c9c5f ("hwrng: amd - Migrate to managed API").
Cc: Matt Mackall <mpm@selenic.com>
Cc: Corentin LABBE <clabbe.montjoie@gmail.com>
Cc: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: linux-crypto@vger.kernel.org
Cc: linux-geode@lists.infradead.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/amd-rng.c | 42 ++++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index 4a99ac756f08..9959c762da2f 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -55,6 +55,7 @@ MODULE_DEVICE_TABLE(pci, pci_tbl);
 struct amd768_priv {
 	void __iomem *iobase;
 	struct pci_dev *pcidev;
+	u32 pmbase;
 };
 
 static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -148,33 +149,58 @@ found:
 	if (pmbase == 0)
 		return -EIO;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET,
-				PMBASE_SIZE, DRV_NAME)) {
+	if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) {
 		dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
 			pmbase + 0xF0);
-		return -EBUSY;
+		err = -EBUSY;
+		goto out;
 	}
 
-	priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET,
-			PMBASE_SIZE);
+	priv->iobase = ioport_map(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
 	if (!priv->iobase) {
 		pr_err(DRV_NAME "Cannot map ioport\n");
-		return -ENOMEM;
+		err = -EINVAL;
+		goto err_iomap;
 	}
 
 	amd_rng.priv = (unsigned long)priv;
+	priv->pmbase = pmbase;
 	priv->pcidev = pdev;
 
 	pr_info(DRV_NAME " detected\n");
-	return devm_hwrng_register(&pdev->dev, &amd_rng);
+	err = hwrng_register(&amd_rng);
+	if (err) {
+		pr_err(DRV_NAME " registering failed (%d)\n", err);
+		goto err_hwrng;
+	}
+	return 0;
+
+err_hwrng:
+	ioport_unmap(priv->iobase);
+err_iomap:
+	release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+out:
+	kfree(priv);
+	return err;
 }
 
 static void __exit mod_exit(void)
 {
+	struct amd768_priv *priv;
+
+	priv = (struct amd768_priv *)amd_rng.priv;
+
+	hwrng_unregister(&amd_rng);
+
+	ioport_unmap(priv->iobase);
+
+	release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+
+	kfree(priv);
 }
 
 module_init(mod_init);
-- 
cgit v1.2.3


From 8c75704ebcac2ffa31ee7bcc359baf701b52bf00 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 14 Mar 2017 07:36:02 -0400
Subject: hwrng: geode - Revert managed API changes

After commit e9afc746299d ("hwrng: geode - Use linux/io.h instead of
asm/io.h") the geode-rng driver uses devres with pci_dev->dev to keep
track of resources, but does not actually register a PCI driver.  This
results in the following issues:

1.  The driver leaks memory because the driver does not attach to a
device.  The driver only uses the PCI device as a reference.   devm_*()
functions will release resources on driver detach, which the geode-rng
driver will never do.  As a result,

2.  The driver cannot be reloaded because there is always a use of the
ioport and region after the first load of the driver.

Revert the changes made by  e9afc746299d ("hwrng: geode - Use linux/io.h
instead of asm/io.h").

Cc: <stable@vger.kernel.org>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Fixes: 6e9b5e76882c ("hwrng: geode - Migrate to managed API")
Cc: Matt Mackall <mpm@selenic.com>
Cc: Corentin LABBE <clabbe.montjoie@gmail.com>
Cc: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: linux-crypto@vger.kernel.org
Cc: linux-geode@lists.infradead.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/geode-rng.c | 50 ++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
index e7a245942029..e1d421a36a13 100644
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
@@ -31,6 +31,9 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+
+#define PFX	KBUILD_MODNAME ": "
+
 #define GEODE_RNG_DATA_REG   0x50
 #define GEODE_RNG_STATUS_REG 0x54
 
@@ -82,6 +85,7 @@ static struct hwrng geode_rng = {
 
 static int __init mod_init(void)
 {
+	int err = -ENODEV;
 	struct pci_dev *pdev = NULL;
 	const struct pci_device_id *ent;
 	void __iomem *mem;
@@ -89,27 +93,43 @@ static int __init mod_init(void)
 
 	for_each_pci_dev(pdev) {
 		ent = pci_match_id(pci_tbl, pdev);
-		if (ent) {
-			rng_base = pci_resource_start(pdev, 0);
-			if (rng_base == 0)
-				return -ENODEV;
-
-			mem = devm_ioremap(&pdev->dev, rng_base, 0x58);
-			if (!mem)
-				return -ENOMEM;
-			geode_rng.priv = (unsigned long)mem;
-
-			pr_info("AMD Geode RNG detected\n");
-			return devm_hwrng_register(&pdev->dev, &geode_rng);
-		}
+		if (ent)
+			goto found;
 	}
-
 	/* Device not found. */
-	return -ENODEV;
+	goto out;
+
+found:
+	rng_base = pci_resource_start(pdev, 0);
+	if (rng_base == 0)
+		goto out;
+	err = -ENOMEM;
+	mem = ioremap(rng_base, 0x58);
+	if (!mem)
+		goto out;
+	geode_rng.priv = (unsigned long)mem;
+
+	pr_info("AMD Geode RNG detected\n");
+	err = hwrng_register(&geode_rng);
+	if (err) {
+		pr_err(PFX "RNG registering failed (%d)\n",
+		       err);
+		goto err_unmap;
+	}
+out:
+	return err;
+
+err_unmap:
+	iounmap(mem);
+	goto out;
 }
 
 static void __exit mod_exit(void)
 {
+	void __iomem *mem = (void __iomem *)geode_rng.priv;
+
+	hwrng_unregister(&geode_rng);
+	iounmap(mem);
 }
 
 module_init(mod_init);
-- 
cgit v1.2.3


From f717629c7f834ab2efa05c7dbf0826f1d7c32ade Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Thu, 16 Mar 2017 14:37:11 +0530
Subject: powerpc: Wire up statx() syscall

Test runs on a ppc64 BE guest succeeded. linux/samples/statx/test-statx
program was executed on the following file types,

1. Regular file
2. Directory
3. device file
4. symlink
5. Named pipe

The test run also included invoking test-statx with the runtime options
provided in the main() function of test-statx.c

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/systbl.h      | 1 +
 arch/powerpc/include/asm/unistd.h      | 2 +-
 arch/powerpc/include/uapi/asm/unistd.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 4b369d83fe9c..1c9470881c4a 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -387,3 +387,4 @@ SYSCALL(copy_file_range)
 COMPAT_SYS_SPU(preadv2)
 COMPAT_SYS_SPU(pwritev2)
 SYSCALL(kexec_file_load)
+SYSCALL(statx)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index eb1acee91a20..9ba11dbcaca9 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		383
+#define NR_syscalls		384
 
 #define __NR__exit __NR_exit
 
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 2f26335a3c42..b85f14228857 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -393,5 +393,6 @@
 #define __NR_preadv2		380
 #define __NR_pwritev2		381
 #define __NR_kexec_file_load	382
+#define __NR_statx		383
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
-- 
cgit v1.2.3


From 40ceda09c8c84694c2ca6b00bcc6dc71e8e62d96 Mon Sep 17 00:00:00 2001
From: yong mao <yong.mao@mediatek.com>
Date: Sat, 4 Mar 2017 15:10:03 +0800
Subject: mmc: mediatek: Fixed bug where clock frequency could be set wrong

This patch can fix two issues:

Issue 1:
In previous code, div may be overflow when setting clock frequency
as f_min. We can use DIV_ROUND_UP to fix this boundary related
issue.

Issue 2:
In previous code, we can not set the correct clock frequency when
div equals 0xff.

Signed-off-by: Yong Mao <yong.mao@mediatek.com>
Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mtk-sd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 8e32580c12b5..b235d8da0602 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -580,7 +580,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
 		}
 	}
 	sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV,
-			(mode << 8) | (div % 0xff));
+		      (mode << 8) | div);
 	sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN);
 	while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
 		cpu_relax();
@@ -1559,7 +1559,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
 	host->src_clk_freq = clk_get_rate(host->src_clk);
 	/* Set host parameters to mmc */
 	mmc->ops = &mt_msdc_ops;
-	mmc->f_min = host->src_clk_freq / (4 * 255);
+	mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255);
 
 	mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
 	/* MMC core transfer sizes tunable parameters */
-- 
cgit v1.2.3


From e552a8389aa409e257b7dcba74f67f128f979ccc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:48 +0100
Subject: perf/core: Fix use-after-free in perf_release()

Dmitry reported syzcaller tripped a use-after-free in perf_release().

After much puzzlement Oleg spotted the below scenario:

  Task1                           Task2

  fork()
    perf_event_init_task()
    /* ... */
    goto bad_fork_$foo;
    /* ... */
    perf_event_free_task()
      mutex_lock(ctx->lock)
      perf_free_event(B)

                                  perf_event_release_kernel(A)
                                    mutex_lock(A->child_mutex)
                                    list_for_each_entry(child, ...) {
                                      /* child == B */
                                      ctx = B->ctx;
                                      get_ctx(ctx);
                                      mutex_unlock(A->child_mutex);

        mutex_lock(A->child_mutex)
        list_del_init(B->child_list)
        mutex_unlock(A->child_mutex)

        /* ... */

      mutex_unlock(ctx->lock);
      put_ctx() /* >0 */
    free_task();
                                      mutex_lock(ctx->lock);
                                      mutex_lock(A->child_mutex);
                                      /* ... */
                                      mutex_unlock(A->child_mutex);
                                      mutex_unlock(ctx->lock)
                                      put_ctx() /* 0 */
                                        ctx->task && !TOMBSTONE
                                          put_task_struct() /* UAF */

This patch closes the hole by making perf_event_free_task() destroy the
task <-> ctx relation such that perf_event_release_kernel() will no longer
observe the now dead task.

Spotted-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fweisbec@gmail.com
Cc: oleg@redhat.com
Cc: stable@vger.kernel.org
Fixes: c6e5b73242d2 ("perf: Synchronously clean up child events")
Link: http://lkml.kernel.org/r/20170314155949.GE32474@worktop
Link: http://lkml.kernel.org/r/20170316125823.140295131@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1031bdf9f012..4742909c56e6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10415,6 +10415,17 @@ void perf_event_free_task(struct task_struct *task)
 			continue;
 
 		mutex_lock(&ctx->mutex);
+		raw_spin_lock_irq(&ctx->lock);
+		/*
+		 * Destroy the task <-> ctx relation and mark the context dead.
+		 *
+		 * This is important because even though the task hasn't been
+		 * exposed yet the context has been (through child_list).
+		 */
+		RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+		WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+		put_task_struct(task); /* cannot be last */
+		raw_spin_unlock_irq(&ctx->lock);
 again:
 		list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
 				group_entry)
-- 
cgit v1.2.3


From e7cc4865f0f31698ef2f7aac01a50e78968985b7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:49 +0100
Subject: perf/core: Fix event inheritance on fork()

While hunting for clues to a use-after-free, Oleg spotted that
perf_event_init_context() can loose an error value with the result
that fork() can succeed even though we did not fully inherit the perf
event context.

Spotted-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: oleg@redhat.com
Cc: stable@vger.kernel.org
Fixes: 889ff0150661 ("perf/core: Split context's event group list into pinned and non-pinned lists")
Link: http://lkml.kernel.org/r/20170316125823.190342547@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4742909c56e6..fc7c9a85944d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10679,7 +10679,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
 		ret = inherit_task_group(event, parent, parent_ctx,
 					 child, ctxn, &inherited_all);
 		if (ret)
-			break;
+			goto out_unlock;
 	}
 
 	/*
@@ -10695,7 +10695,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
 		ret = inherit_task_group(event, parent, parent_ctx,
 					 child, ctxn, &inherited_all);
 		if (ret)
-			break;
+			goto out_unlock;
 	}
 
 	raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -10723,6 +10723,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
 	}
 
 	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+out_unlock:
 	mutex_unlock(&parent_ctx->mutex);
 
 	perf_unpin_context(parent_ctx);
-- 
cgit v1.2.3


From 15121c789e001168decac6483d192bdb7ea29e74 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:50 +0100
Subject: perf/core: Simplify perf_event_free_task()

We have ctx->event_list that contains all events; no need to
repeatedly iterate the group lists to find them all.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fweisbec@gmail.com
Link: http://lkml.kernel.org/r/20170316125823.239678244@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc7c9a85944d..5f21e5e09ba4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10426,21 +10426,11 @@ void perf_event_free_task(struct task_struct *task)
 		WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
 		put_task_struct(task); /* cannot be last */
 		raw_spin_unlock_irq(&ctx->lock);
-again:
-		list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
-				group_entry)
-			perf_free_event(event, ctx);
 
-		list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
-				group_entry)
+		list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
 			perf_free_event(event, ctx);
 
-		if (!list_empty(&ctx->pinned_groups) ||
-				!list_empty(&ctx->flexible_groups))
-			goto again;
-
 		mutex_unlock(&ctx->mutex);
-
 		put_ctx(ctx);
 	}
 }
-- 
cgit v1.2.3


From d8a8cfc76919b6c830305266b23ba671623f37ff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:51 +0100
Subject: perf/core: Better explain the inherit magic

While going through the event inheritance code Oleg got confused.

Add some comments to better explain the silent dissapearance of
orphaned events.

So what happens is that at perf_event_release_kernel() time; when an
event looses its connection to userspace (and ceases to exist from the
user's perspective) we can still have an arbitrary amount of inherited
copies of the event. We want to synchronously find and remove all
these child events.

Since that requires a bit of lock juggling, there is the possibility
that concurrent clone()s will create new child events. Therefore we
first mark the parent event as DEAD, which marks all the extant child
events as orphaned.

We then avoid copying orphaned events; in order to avoid getting more
of them.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fweisbec@gmail.com
Link: http://lkml.kernel.org/r/20170316125823.289567442@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5f21e5e09ba4..7298e149b732 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4254,7 +4254,7 @@ int perf_event_release_kernel(struct perf_event *event)
 
 	raw_spin_lock_irq(&ctx->lock);
 	/*
-	 * Mark this even as STATE_DEAD, there is no external reference to it
+	 * Mark this event as STATE_DEAD, there is no external reference to it
 	 * anymore.
 	 *
 	 * Anybody acquiring event->child_mutex after the below loop _must_
@@ -10468,7 +10468,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 }
 
 /*
- * inherit a event from parent task to child task:
+ * Inherit a event from parent task to child task.
+ *
+ * Returns:
+ *  - valid pointer on success
+ *  - NULL for orphaned events
+ *  - IS_ERR() on error
  */
 static struct perf_event *
 inherit_event(struct perf_event *parent_event,
@@ -10562,6 +10567,16 @@ inherit_event(struct perf_event *parent_event,
 	return child_event;
 }
 
+/*
+ * Inherits an event group.
+ *
+ * This will quietly suppress orphaned events; !inherit_event() is not an error.
+ * This matches with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int inherit_group(struct perf_event *parent_event,
 	      struct task_struct *parent,
 	      struct perf_event_context *parent_ctx,
@@ -10576,6 +10591,11 @@ static int inherit_group(struct perf_event *parent_event,
 				 child, NULL, child_ctx);
 	if (IS_ERR(leader))
 		return PTR_ERR(leader);
+	/*
+	 * @leader can be NULL here because of is_orphaned_event(). In this
+	 * case inherit_event() will create individual events, similar to what
+	 * perf_group_detach() would do anyway.
+	 */
 	list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
 		child_ctr = inherit_event(sub, parent, parent_ctx,
 					    child, leader, child_ctx);
@@ -10585,6 +10605,17 @@ static int inherit_group(struct perf_event *parent_event,
 	return 0;
 }
 
+/*
+ * Creates the child task context and tries to inherit the event-group.
+ *
+ * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
+ * inherited_all set when we 'fail' to inherit an orphaned event; this is
+ * consistent with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int
 inherit_task_group(struct perf_event *event, struct task_struct *parent,
 		   struct perf_event_context *parent_ctx,
@@ -10607,7 +10638,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
 		 * First allocate and initialize a context for the
 		 * child.
 		 */
-
 		child_ctx = alloc_perf_context(parent_ctx->pmu, child);
 		if (!child_ctx)
 			return -ENOMEM;
-- 
cgit v1.2.3


From 66822d815ae61ecb2d9dba9031517e8a8476969d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 15 Mar 2017 21:11:46 -0400
Subject: drm/radeon: reinstate oland workaround for sclk

Higher sclks seem to be unstable on some boards.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/si_dpm.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 72e1588580a1..c7af9fdd20c7 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2985,9 +2985,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 			max_sclk = 75000;
 		}
 	} else if (rdev->family == CHIP_OLAND) {
-		if ((rdev->pdev->device == 0x6604) &&
-		    (rdev->pdev->subsystem_vendor == 0x1028) &&
-		    (rdev->pdev->subsystem_device == 0x066F)) {
+		if ((rdev->pdev->revision == 0xC7) ||
+		    (rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0x87) ||
+		    (rdev->pdev->device == 0x6604) ||
+		    (rdev->pdev->device == 0x6605)) {
 			max_sclk = 75000;
 		}
 	}
-- 
cgit v1.2.3


From e11ddff68a7c455e63c4b46154a3e75c699a7b55 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 15 Mar 2017 21:13:25 -0400
Subject: drm/amdgpu: reinstate oland workaround for sclk

Higher sclks seem to be unstable on some boards.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 33b504bafb88..c5dec210d529 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3465,9 +3465,13 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 			max_sclk = 75000;
 		}
 	} else if (adev->asic_type == CHIP_OLAND) {
-		if ((adev->pdev->device == 0x6604) &&
-		    (adev->pdev->subsystem_vendor == 0x1028) &&
-		    (adev->pdev->subsystem_device == 0x066F)) {
+		if ((adev->pdev->revision == 0xC7) ||
+		    (adev->pdev->revision == 0x80) ||
+		    (adev->pdev->revision == 0x81) ||
+		    (adev->pdev->revision == 0x83) ||
+		    (adev->pdev->revision == 0x87) ||
+		    (adev->pdev->device == 0x6604) ||
+		    (adev->pdev->device == 0x6605)) {
 			max_sclk = 75000;
 		}
 	}
-- 
cgit v1.2.3


From 60a970a6c58dedb4c6b2d90b75f8d6ad7c7b34dc Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Wed, 15 Mar 2017 10:13:32 +0800
Subject: drm/amdgpu: fix the clearing wb size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The clearing wb size should be the one that it is assigned.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a3a105ec99e2..de0cf3315484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -475,7 +475,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->wb.wb_obj == NULL) {
-		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * 4,
+		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t),
 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
 					    (void **)&adev->wb.wb);
@@ -488,7 +488,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 
 		/* clear wb memory */
-		memset((char *)adev->wb.wb, 0, AMDGPU_GPU_PAGE_SIZE);
+		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 8bcd37d8b21de13d068414c018c9599281294a01 Mon Sep 17 00:00:00 2001
From: "Winkler, Tomas" <tomas.winkler@intel.com>
Date: Thu, 9 Mar 2017 16:58:21 +0200
Subject: mmc: core: mmc_blk_rw_cmd_err - remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix compilation warning:

drivers/mmc/core/block.c:1563:24: warning: variable ‘mq_rq’ set but not
used [-Wunused-but-set-variable]  struct mmc_queue_req *mq_rq;

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 05afefcfb611..ff3da960c473 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1560,11 +1560,8 @@ static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 			       struct mmc_blk_request *brq, struct request *req,
 			       bool old_req_pending)
 {
-	struct mmc_queue_req *mq_rq;
 	bool req_pending;
 
-	mq_rq = container_of(brq, struct mmc_queue_req, brq);
-
 	/*
 	 * If this is an SD card and we're writing, we can first
 	 * mark the known good sectors as ok.
-- 
cgit v1.2.3


From 2ce0c7b65505e0d915e99389cced45b478dc935d Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Thu, 9 Mar 2017 16:18:20 +0100
Subject: mmc: sdhci-of-at91: Support external regulators

The SDHCI controller in the SAMA5D2 chip requires a valid voltage set
in the power control register, otherwise commands will fail with a
timeout error.

When using the regulator framework to specify the regulator used by the
mmc device, the voltage is not configured, and it is not possible to use
the connected device.

Implement a custom 'set_power' function for this specific hardware, that
configures the voltage in the register in all cases.

Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Cc: stable@vger.kernel.org #4.9+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-at91.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 2f9ad213377a..7fd964256faa 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -85,11 +85,30 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock)
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 }
 
+/*
+ * In this specific implementation of the SDHCI controller, the power register
+ * needs to have a valid voltage set even when the power supply is managed by
+ * an external regulator.
+ */
+static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
+		     unsigned short vdd)
+{
+	if (!IS_ERR(host->mmc->supply.vmmc)) {
+		struct mmc_host *mmc = host->mmc;
+
+		spin_unlock_irq(&host->lock);
+		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+		spin_lock_irq(&host->lock);
+	}
+	sdhci_set_power_noreg(host, mode, vdd);
+}
+
 static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
 	.set_clock		= sdhci_at91_set_clock,
 	.set_bus_width		= sdhci_set_bus_width,
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
+	.set_power		= sdhci_at91_set_power,
 };
 
 static const struct sdhci_pltfm_data soc_data_sama5d2 = {
-- 
cgit v1.2.3


From 181302dc7239add8ab1449c23ecab193f52ee6ab Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:40:22 +0100
Subject: mmc: ushc: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: 53f3a9e26ed5 ("mmc: USB SD Host Controller (USHC) driver")
Cc: stable <stable@vger.kernel.org>	# 2.6.37
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/ushc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c
index d2c386f09d69..1d843357422e 100644
--- a/drivers/mmc/host/ushc.c
+++ b/drivers/mmc/host/ushc.c
@@ -426,6 +426,9 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	struct ushc_data *ushc;
 	int ret;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev);
 	if (mmc == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3


From efd4b81abbe1ac753717f2f10cd3dab8bed6c103 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 16 Mar 2017 09:46:14 -0600
Subject: blk-stat: fix blk_stat_sum() if all samples are batched

We need to flush the batch _before_ we check the number of samples,
otherwise we'll miss all of the batched samples.

Fixes: cf43e6b ("block: add scalable completion tracking of requests")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-stat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-stat.c b/block/blk-stat.c
index 9b43efb8933f..186fcb981e9b 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -30,11 +30,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat)
 
 static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
 {
+	blk_stat_flush_batch(src);
+
 	if (!src->nr_samples)
 		return;
 
-	blk_stat_flush_batch(src);
-
 	dst->min = min(dst->min, src->min);
 	dst->max = max(dst->max, src->max);
 
-- 
cgit v1.2.3


From 29c8bbbd6e21daa0997d1c3ee886b897ee7ad652 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:43 +0000
Subject: afs: Fix missing put_page()

In afs_writepages_region(), inside the loop where we find dirty pages to
deal with, one of the if-statements is missing a put_page().

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index c83c1a0e851f..e919e64cd4e0 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -513,6 +513,7 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		if (PageWriteback(page) || !PageDirty(page)) {
 			unlock_page(page);
+			put_page(page);
 			continue;
 		}
 
-- 
cgit v1.2.3


From 5611ef280d814042825ee17688f5751266fc538b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:43 +0000
Subject: afs: Fix page overput in afs_fill_page()

afs_fill_page() loads the page it wants to fill into the afs_read request
without incrementing its refcount - but then calls afs_put_read() to clean
up afterwards, which then releases a ref on the page.

Fix this by getting a ref on the page before calling
afs_vnode_fetch_data().

This causes sync after a write to hang in afs_writepages_region() because
find_get_pages_tag() gets confused and doesn't return.

Fixes: 196ee9cd2d04 ("afs: Make afs_fs_fetch_data() take a list of pages")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index e919e64cd4e0..3ac52f6a96ff 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -101,6 +101,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 	req->pos = pos;
 	req->nr_pages = 1;
 	req->pages[0] = page;
+	get_page(page);
 
 	i_size = i_size_read(&vnode->vfs_inode);
 	if (pos + PAGE_SIZE > i_size)
-- 
cgit v1.2.3


From 6186f0788b31f44affceeedc7b48eb10faea120d Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:43 +0000
Subject: afs: Populate group ID from vnode status

The group was hard coded to GLOBAL_ROOT_GID; use the group
ID that was received from the server.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1e4897a048d2..299dbaeb2e2a 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -70,7 +70,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 
 	set_nlink(inode, vnode->status.nlink);
 	inode->i_uid		= vnode->status.owner;
-	inode->i_gid		= GLOBAL_ROOT_GID;
+	inode->i_gid            = vnode->status.group;
 	inode->i_size		= vnode->status.size;
 	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
 	inode->i_ctime.tv_nsec	= 0;
-- 
cgit v1.2.3


From 627f46943ff90bcc32ddeb675d881c043c6fa2ae Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: afs: Adjust mode bits processing

Mode bits for an afs file should not be enforced in the usual
way.

For files, the absence of user bits can restrict file access
with respect to what is granted by the server.

These bits apply regardless of the owner or the current uid; the
rest of the mode bits (group, other) are ignored.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/security.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc89..bfa9d3428383 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
 	} else {
 		if (!(access & AFS_ACE_LOOKUP))
 			goto permission_denied;
+		if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+			goto permission_denied;
 		if (mask & (MAY_EXEC | MAY_READ)) {
 			if (!(access & AFS_ACE_READ))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IRUSR))
+				goto permission_denied;
 		} else if (mask & MAY_WRITE) {
 			if (!(access & AFS_ACE_WRITE))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IWUSR))
+				goto permission_denied;
 		}
 	}
 
 	key_put(key);
-	ret = generic_permission(inode, mask);
 	_leave(" = %d", ret);
 	return ret;
 
-- 
cgit v1.2.3


From bcd89270d93b7edebb5de5e5e7dca1a77a33496e Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: afs: Deal with an empty callback array

Servers may send a callback array that is the same size as
the FID array, or an empty array.  If the callback count is
0, the code would attempt to read (fid_count * 12) bytes of
data, which would fail and result in an unmarshalling error.
This would lead to stale data for remotely modified files
or directories.

Store the callback array size in the internal afs_call
structure and use that to determine the amount of data to
read.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/cmservice.c | 11 +++++------
 fs/afs/internal.h  |  5 ++++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 2edbdcbf6432..3062cceb5c2a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	struct afs_callback *cb;
 	struct afs_server *server;
 	__be32 *bp;
-	u32 tmp;
 	int ret, loop;
 
 	_enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		if (ret < 0)
 			return ret;
 
-		tmp = ntohl(call->tmp);
-		_debug("CB count: %u", tmp);
-		if (tmp != call->count && tmp != 0)
+		call->count2 = ntohl(call->tmp);
+		_debug("CB count: %u", call->count2);
+		if (call->count2 != call->count && call->count2 != 0)
 			return -EBADMSG;
 		call->offset = 0;
 		call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	case 4:
 		_debug("extract CB array");
 		ret = afs_extract_data(call, call->buffer,
-				       call->count * 3 * 4, false);
+				       call->count2 * 3 * 4, false);
 		if (ret < 0)
 			return ret;
 
 		_debug("unmarshall CB array");
 		cb = call->request;
 		bp = call->buffer;
-		for (loop = call->count; loop > 0; loop--, cb++) {
+		for (loop = call->count2; loop > 0; loop--, cb++) {
 			cb->version	= ntohl(*bp++);
 			cb->expiry	= ntohl(*bp++);
 			cb->type	= ntohl(*bp++);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5dfa56903a2d..8499870147ef 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -90,7 +90,10 @@ struct afs_call {
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
-	unsigned		last_to;	/* amount of mapping[last] */
+	union {
+		unsigned	last_to;	/* amount of mapping[last] */
+		unsigned	count2;		/* count used in unmarshalling */
+	};
 	unsigned char		unmarshall;	/* unmarshalling phase */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */
-- 
cgit v1.2.3


From 6db3ac3c4bc552837d232ec794559a2fae2815a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: afs: Handle better the server returning excess or short data

When an AFS server is given an FS.FetchData{,64} request to read data from
a file, it is permitted by the protocol to return more or less than was
requested.  kafs currently relies on the latter behaviour in readpage{,s}
to handle a partial page at the end of the file (we just ask for a whole
page and clear space beyond the short read).

However, we don't handle all cases.  Add:

 (1) Handle excess data by discarding it rather than aborting.  Note that
     we use a common static buffer to discard into so that the decryption
     algorithm advances the PCBC state.

 (2) Handle a short read that affects more than just the last page.

Note that if a read comes up unexpectedly short of long, it's possible that
the server's copy of the file changed - in which case the data version
number will have been incremented and the callback will have been broken -
in which case all the pages currently attached to the inode will be zapped
anyway at some point.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     |  7 +++++--
 fs/afs/fsclient.c | 49 +++++++++++++++++++++++++++++++++++--------------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index ba7b71fba34b..a38e1c30d110 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -184,10 +184,13 @@ int afs_page_filler(void *data, struct page *page)
 		if (!req)
 			goto enomem;
 
+		/* We request a full page.  If the page is a partial one at the
+		 * end of the file, the server will return a short read and the
+		 * unmarshalling code will clear the unfilled space.
+		 */
 		atomic_set(&req->usage, 1);
 		req->pos = (loff_t)page->index << PAGE_SHIFT;
-		req->len = min_t(size_t, i_size_read(inode) - req->pos,
-				 PAGE_SIZE);
+		req->len = PAGE_SIZE;
 		req->nr_pages = 1;
 		req->pages[0] = page;
 		get_page(page);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ac8e766978dc..bf8904a1a58f 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,6 +16,12 @@
 #include "internal.h"
 #include "afs_fs.h"
 
+/*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
 /*
  * decode an AFSFid block
  */
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		req->actual_len |= ntohl(call->tmp);
 		_debug("DATA length: %llu", req->actual_len);
-		/* Check that the server didn't want to send us extra.  We
-		 * might want to just discard instead, but that requires
-		 * cooperation from AF_RXRPC.
-		 */
-		if (req->actual_len > req->len)
-			return -EBADMSG;
 
 		req->remain = req->actual_len;
 		call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		call->unmarshall++;
 
 	begin_page:
+		ASSERTCMP(req->index, <, req->nr_pages);
 		if (req->remain > PAGE_SIZE - call->offset)
 			size = PAGE_SIZE - call->offset;
 		else
@@ -390,18 +391,37 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 			if (req->page_done)
 				req->page_done(call, req);
 			if (req->remain > 0) {
-				req->index++;
 				call->offset = 0;
+				req->index++;
+				if (req->index >= req->nr_pages)
+					goto begin_discard;
 				goto begin_page;
 			}
 		}
+		goto no_more_data;
+
+		/* Discard any excess data the server gave us */
+	begin_discard:
+	case 4:
+		size = min_t(size_t, sizeof(afs_discard_buffer), req->remain);
+		call->count = size;
+		_debug("extract discard %u/%llu %zu/%u",
+		       req->remain, req->actual_len, call->offset, call->count);
+
+		call->offset = 0;
+		ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+		req->remain -= call->offset;
+		if (ret < 0)
+			return ret;
+		if (req->remain > 0)
+			goto begin_discard;
 
 	no_more_data:
 		call->offset = 0;
-		call->unmarshall++;
+		call->unmarshall = 5;
 
 		/* extract the metadata */
-	case 4:
+	case 5:
 		ret = afs_extract_data(call, call->buffer,
 				       (21 + 3 + 6) * 4, false);
 		if (ret < 0)
@@ -416,16 +436,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		call->offset = 0;
 		call->unmarshall++;
 
-	case 5:
+	case 6:
 		break;
 	}
 
-	if (call->count < PAGE_SIZE) {
-		buffer = kmap(req->pages[req->index]);
-		memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-		kunmap(req->pages[req->index]);
+	for (; req->index < req->nr_pages; req->index++) {
+		if (call->count < PAGE_SIZE)
+			zero_user_segment(req->pages[req->index],
+					  call->count, PAGE_SIZE);
 		if (req->page_done)
 			req->page_done(call, req);
+		call->count = 0;
 	}
 
 	_leave(" = 0 [done]");
-- 
cgit v1.2.3


From 3448e6521755862446aed28e29abf12565d8844e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: afs: Kill struct afs_read::pg_offset

Kill struct afs_read::pg_offset as nothing uses it.  It's unnecessary as pos
can be masked off.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8499870147ef..7784a8bc375c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -135,7 +135,6 @@ struct afs_read {
 	atomic_t		usage;
 	unsigned int		remain;		/* Amount remaining */
 	unsigned int		index;		/* Which page we're reading into */
-	unsigned int		pg_offset;	/* Offset in page we're at */
 	unsigned int		nr_pages;
 	void (*page_done)(struct afs_call *, struct afs_read *);
 	struct page		*pages[];
-- 
cgit v1.2.3


From e8e581a88c5f5fc7cf1f636d122b77fbcfc8c2f6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: afs: Handle a short write to an AFS page

Handle the situation where afs_write_begin() is told to expect that a
full-page write will be made, but this doesn't happen (EFAULT, CTRL-C,
etc.), and so afs_write_end() sees a partial write took place.  Currently,
no attempt is to deal with the discrepency.

Fix this by loading the gap from the server.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c |  4 +++-
 fs/afs/internal.h |  2 +-
 fs/afs/write.c    | 28 +++++++++++++++++++---------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index bf8904a1a58f..6f917dd1238c 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -393,8 +393,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 			if (req->remain > 0) {
 				call->offset = 0;
 				req->index++;
-				if (req->index >= req->nr_pages)
+				if (req->index >= req->nr_pages) {
+					call->unmarshall = 4;
 					goto begin_discard;
+				}
 				goto begin_page;
 			}
 		}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7784a8bc375c..dc2cb486e127 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -130,7 +130,7 @@ struct afs_call_type {
  */
 struct afs_read {
 	loff_t			pos;		/* Where to start reading */
-	loff_t			len;		/* How much to read */
+	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
 	atomic_t		usage;
 	unsigned int		remain;		/* Amount remaining */
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3ac52f6a96ff..ea66890fc188 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
  * partly or wholly fill a page that's under preparation for writing
  */
 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-			 loff_t pos, struct page *page)
+			 loff_t pos, unsigned int len, struct page *page)
 {
 	struct afs_read *req;
-	loff_t i_size;
 	int ret;
 
 	_enter(",,%llu", (unsigned long long)pos);
@@ -99,16 +98,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 
 	atomic_set(&req->usage, 1);
 	req->pos = pos;
+	req->len = len;
 	req->nr_pages = 1;
 	req->pages[0] = page;
 	get_page(page);
 
-	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + PAGE_SIZE > i_size)
-		req->len = i_size - pos;
-	else
-		req->len = PAGE_SIZE;
-
 	ret = afs_vnode_fetch_data(vnode, key, req);
 	afs_put_read(req);
 	if (ret < 0) {
@@ -164,7 +158,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
 	if (!PageUptodate(page) && len != PAGE_SIZE) {
-		ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+		ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
 		if (ret < 0) {
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
@@ -258,7 +252,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		  struct page *page, void *fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct key *key = file->private_data;
 	loff_t i_size, maybe_i_size;
+	int ret;
 
 	_enter("{%x:%u},{%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -274,6 +270,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		spin_unlock(&vnode->writeback_lock);
 	}
 
+	if (!PageUptodate(page)) {
+		if (copied < len) {
+			/* Try and load any missing data from the server.  The
+			 * unmarshalling routine will take care of clearing any
+			 * bits that are beyond the EOF.
+			 */
+			ret = afs_fill_page(vnode, key, pos + copied,
+					    len - copied, page);
+			if (ret < 0)
+				return ret;
+		}
+		SetPageUptodate(page);
+	}
+
 	set_page_dirty(page);
 	if (PageDirty(page))
 		_debug("dirtied");
-- 
cgit v1.2.3


From 58fed94dfb17e89556b5705f20f90e5b2971b6a1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: afs: Flush outstanding writes when an fd is closed

Flush outstanding writes in afs when an fd is closed.  This is what NFS and
CIFS do.

Reported-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     |  1 +
 fs/afs/internal.h |  1 +
 fs/afs/write.c    | 14 ++++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index a38e1c30d110..b5829443ff69 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
 
 const struct file_operations afs_file_operations = {
 	.open		= afs_open,
+	.flush		= afs_flush,
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index dc2cb486e127..af1d91ec7f2c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -720,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index ea66890fc188..f1450ea09406 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -757,6 +757,20 @@ out:
 	return ret;
 }
 
+/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+	_enter("");
+
+	if ((file->f_mode & FMODE_WRITE) == 0)
+		return 0;
+
+	return vfs_fsync(file, 0);
+}
+
 /*
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal
-- 
cgit v1.2.3


From 944c74f472f926785b1948efa0e73e2f1b3b539b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: afs: Distinguish mountpoints from symlinks by file mode alone

In AFS, mountpoints appear as symlinks with mode 0644 and normal symlinks
have mode 0777, so use this to distinguish them rather than reading the
content and parsing it.  In the case of a mountpoint, the symlink body is a
formatted string indicating the location of the target volume.

Note that with this, kAFS no longer 'pre-fetches' the contents of symlinks,
so afs_readpage() may fail with an access-denial because when the VFS calls
d_automount(), it wraps the call in an credentials override that sets the
initial creds - thereby preventing access to the caller's keyrings and the
authentication keys held therein.

To this end, a patch reverting that change to the VFS is required also.

Reported-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c    | 29 +++++++++++++++--------------
 fs/afs/internal.h |  1 -
 fs/afs/mntpt.c    | 53 -----------------------------------------------------
 3 files changed, 15 insertions(+), 68 deletions(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 299dbaeb2e2a..ade6ec3873cf 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 		inode->i_fop	= &afs_dir_file_operations;
 		break;
 	case AFS_FTYPE_SYMLINK:
-		inode->i_mode	= S_IFLNK | vnode->status.mode;
-		inode->i_op	= &page_symlink_inode_operations;
+		/* Symlinks with a mode of 0644 are actually mountpoints. */
+		if ((vnode->status.mode & 0777) == 0644) {
+			inode->i_flags |= S_AUTOMOUNT;
+
+			spin_lock(&vnode->lock);
+			set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+			spin_unlock(&vnode->lock);
+
+			inode->i_mode	= S_IFDIR | 0555;
+			inode->i_op	= &afs_mntpt_inode_operations;
+			inode->i_fop	= &afs_mntpt_file_operations;
+		} else {
+			inode->i_mode	= S_IFLNK | vnode->status.mode;
+			inode->i_op	= &page_symlink_inode_operations;
+		}
 		inode_nohighmem(inode);
 		break;
 	default:
@@ -79,18 +92,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	inode->i_generation	= vnode->fid.unique;
 	inode->i_version	= vnode->status.data_version;
 	inode->i_mapping->a_ops	= &afs_fs_aops;
-
-	/* check to see whether a symbolic link is really a mountpoint */
-	if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-		afs_mntpt_check_symlink(vnode, key);
-
-		if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
-			inode->i_mode	= S_IFDIR | vnode->status.mode;
-			inode->i_op	= &afs_mntpt_inode_operations;
-			inode->i_fop	= &afs_mntpt_file_operations;
-		}
-	}
-
 	return 0;
 }
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index af1d91ec7f2c..39de154fb42e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -559,7 +559,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
 
 extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
 
 /*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index d4fb0afc0097..bd3b65cde282 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -46,59 +46,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
 
 static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
 
-/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
-	struct page *page;
-	size_t size;
-	char *buf;
-	int ret;
-
-	_enter("{%x:%u,%u}",
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-	/* read the contents of the symlink into the pagecache */
-	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-			       afs_page_filler, key);
-	if (IS_ERR(page)) {
-		ret = PTR_ERR(page);
-		goto out;
-	}
-
-	ret = -EIO;
-	if (PageError(page))
-		goto out_free;
-
-	buf = kmap(page);
-
-	/* examine the symlink's contents */
-	size = vnode->status.size;
-	_debug("symlink to %*.*s", (int) size, (int) size, buf);
-
-	if (size > 2 &&
-	    (buf[0] == '%' || buf[0] == '#') &&
-	    buf[size - 1] == '.'
-	    ) {
-		_debug("symlink is a mountpoint");
-		spin_lock(&vnode->lock);
-		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-		vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
-		spin_unlock(&vnode->lock);
-	}
-
-	ret = 0;
-
-	kunmap(page);
-out_free:
-	put_page(page);
-out:
-	_leave(" = %d", ret);
-	return ret;
-}
-
 /*
  * no valid lookup procedure on this sort of dir
  */
-- 
cgit v1.2.3


From 1d7e4ebf291d3103466006926329e39aa355944f Mon Sep 17 00:00:00 2001
From: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: afs: inode: Replace rcu_assign_pointer() with RCU_INIT_POINTER()

The use of "rcu_assign_pointer()" is NULLing out the pointer.
According to RCU_INIT_POINTER()'s block comment:
"1.   This use of RCU_INIT_POINTER() is NULLing out the pointer"
it is better to use it instead of rcu_assign_pointer() because it has a
smaller overhead.

The following Coccinelle semantic patch was used:
@@
@@

- rcu_assign_pointer
+ RCU_INIT_POINTER
  (..., NULL)

Signed-off-by: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index ade6ec3873cf..e083e086b7ca 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -445,7 +445,7 @@ void afs_evict_inode(struct inode *inode)
 
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
+	RCU_INIT_POINTER(vnode->permits, NULL);
 	mutex_unlock(&vnode->permits_lock);
 	if (permits)
 		call_rcu(&permits->rcu, afs_zap_permits);
-- 
cgit v1.2.3


From df8a09d1b8f9e693ec3f6b7e0162fc817f2cf0db Mon Sep 17 00:00:00 2001
From: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: afs: security: Replace rcu_assign_pointer() with RCU_INIT_POINTER()

The use of "rcu_assign_pointer()" is NULLing out the pointer.
According to RCU_INIT_POINTER()'s block comment:
"1.   This use of RCU_INIT_POINTER() is NULLing out the pointer"
it is better to use it instead of rcu_assign_pointer() because it has a
smaller overhead.

The following Coccinelle semantic patch was used:
@@
@@

- rcu_assign_pointer
+ RCU_INIT_POINTER
  (..., NULL)

Signed-off-by: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/security.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/security.c b/fs/afs/security.c
index bfa9d3428383..ecb86a670180 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
 
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
+	RCU_INIT_POINTER(vnode->permits, NULL);
 	mutex_unlock(&vnode->permits_lock);
 
 	if (permits)
-- 
cgit v1.2.3


From 8a79790bf0b7da216627ffb85f52cfb4adbf1e4e Mon Sep 17 00:00:00 2001
From: Tina Ruchandani <ruchandani.tina@gmail.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: afs: Migrate vlocation fields to 64-bit

get_seconds() returns real wall-clock seconds. On 32-bit systems
this value will overflow in year 2038 and beyond. This patch changes
afs's vlocation record to use ktime_get_real_seconds() instead, for the
fields time_of_death and update_at.

Signed-off-by: Tina Ruchandani <ruchandani.tina@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/callback.c  |  7 ++++---
 fs/afs/internal.h  |  7 ++++---
 fs/afs/server.c    |  6 +++---
 fs/afs/vlocation.c | 16 +++++++++-------
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b29447e03ede..25d404d22cae 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
 {
 	struct afs_server *server;
 	struct afs_vnode *vnode, *xvnode;
-	time_t now;
+	time64_t now;
 	long timeout;
 	int ret;
 
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 
 	/* find the first vnode to update */
 	spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
 
 	/* and then reschedule */
 	_debug("reschedule");
-	vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+	vnode->update_at = ktime_get_real_seconds() +
+			afs_vnode_update_timeout;
 
 	spin_lock(&server->cb_lock);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 39de154fb42e..97a16ce200be 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/rxrpc.h>
@@ -249,7 +250,7 @@ struct afs_cache_vhash {
  */
 struct afs_vlocation {
 	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct list_head	link;		/* link in cell volume location list */
 	struct list_head	grave;		/* link in master graveyard list */
 	struct list_head	update;		/* link in master update list */
@@ -260,7 +261,7 @@ struct afs_vlocation {
 	struct afs_cache_vlocation vldb;	/* volume information DB record */
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
 	wait_queue_head_t	waitq;		/* status change waitqueue */
-	time_t			update_at;	/* time at which record should be updated */
+	time64_t		update_at;	/* time at which record should be updated */
 	spinlock_t		lock;		/* access lock */
 	afs_vlocation_state_t	state;		/* volume location state */
 	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
@@ -273,7 +274,7 @@ struct afs_vlocation {
  */
 struct afs_server {
 	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct in_addr		addr;		/* server address */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d4066ab7dd55..c001b1f2455f 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
 	spin_lock(&afs_server_graveyard_lock);
 	if (atomic_read(&server->usage) == 0) {
 		list_move_tail(&server->grave, &afs_server_graveyard);
-		server->time_of_death = get_seconds();
+		server->time_of_death = ktime_get_real_seconds();
 		queue_delayed_work(afs_wq, &afs_server_reaper,
 				   afs_server_timeout * HZ);
 	}
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
 	LIST_HEAD(corpses);
 	struct afs_server *server;
 	unsigned long delay, expiry;
-	time_t now;
+	time64_t now;
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 	spin_lock(&afs_server_graveyard_lock);
 
 	while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index d7d8dd8c0b31..37b7c3b342a6 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
 	struct afs_vlocation *xvl;
 
 	/* wait at least 10 minutes before updating... */
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+	vl->update_at = ktime_get_real_seconds() +
+			afs_vlocation_update_timeout;
 
 	spin_lock(&afs_vlocation_updates_lock);
 
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
 	if (atomic_read(&vl->usage) == 0) {
 		_debug("buried");
 		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
-		vl->time_of_death = get_seconds();
+		vl->time_of_death = ktime_get_real_seconds();
 		queue_delayed_work(afs_wq, &afs_vlocation_reap,
 				   afs_vlocation_timeout * HZ);
 
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
 	LIST_HEAD(corpses);
 	struct afs_vlocation *vl;
 	unsigned long delay, expiry;
-	time_t now;
+	time64_t now;
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 	spin_lock(&afs_vlocation_graveyard_lock);
 
 	while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
 {
 	struct afs_cache_vlocation vldb;
 	struct afs_vlocation *vl, *xvl;
-	time_t now;
+	time64_t now;
 	long timeout;
 	int ret;
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 
 	/* find a record to update */
 	spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
 
 	/* and then reschedule */
 	_debug("reschedule");
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+	vl->update_at = ktime_get_real_seconds() +
+			afs_vlocation_update_timeout;
 
 	spin_lock(&afs_vlocation_updates_lock);
 
-- 
cgit v1.2.3


From 56e714312e7dbd6bb83b2f78d3ec19a404c7649f Mon Sep 17 00:00:00 2001
From: Tina Ruchandani <ruchandani.tina@gmail.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: afs: Prevent callback expiry timer overflow

get_seconds() returns real wall-clock seconds. On 32-bit systems
this value will overflow in year 2038 and beyond. This patch changes
afs_vnode record to use ktime_get_real_seconds() instead, for the
fields cb_expires and cb_expires_at.

Signed-off-by: Tina Ruchandani <ruchandani.tina@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 2 +-
 fs/afs/inode.c    | 7 ++++---
 fs/afs/internal.h | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 6f917dd1238c..c05452a09398 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -145,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
 	vnode->cb_version	= ntohl(*bp++);
 	vnode->cb_expiry	= ntohl(*bp++);
 	vnode->cb_type		= ntohl(*bp++);
-	vnode->cb_expires	= vnode->cb_expiry + get_seconds();
+	vnode->cb_expires	= vnode->cb_expiry + ktime_get_real_seconds();
 	*_bp = bp;
 }
 
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e083e086b7ca..4079c832ff27 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -246,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 			vnode->cb_version = 0;
 			vnode->cb_expiry = 0;
 			vnode->cb_type = 0;
-			vnode->cb_expires = get_seconds();
+			vnode->cb_expires = ktime_get_real_seconds();
 		} else {
 			vnode->cb_version = cb->version;
 			vnode->cb_expiry = cb->expiry;
 			vnode->cb_type = cb->type;
-			vnode->cb_expires = vnode->cb_expiry + get_seconds();
+			vnode->cb_expires = vnode->cb_expiry +
+				ktime_get_real_seconds();
 		}
 	}
 
@@ -324,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	    !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
 	    !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
 	    !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-		if (vnode->cb_expires < get_seconds() + 10) {
+		if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
 			_debug("callback expired");
 			set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
 		} else {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 97a16ce200be..832555003d03 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -377,8 +377,8 @@ struct afs_vnode {
 	struct rb_node		server_rb;	/* link in server->fs_vnodes */
 	struct rb_node		cb_promise;	/* link in server->cb_promises */
 	struct work_struct	cb_broken_work;	/* work to be done on callback break */
-	time_t			cb_expires;	/* time at which callback expires */
-	time_t			cb_expires_at;	/* time used to order cb_promise */
+	time64_t		cb_expires;	/* time at which callback expires */
+	time64_t		cb_expires_at;	/* time used to order cb_promise */
 	unsigned		cb_version;	/* callback version */
 	unsigned		cb_expiry;	/* callback expiry time */
 	afs_callback_type_t	cb_type;	/* type of callback */
-- 
cgit v1.2.3


From 29f069853287dcb46eaf45a50dbf1232c1444ac6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: afs: Fix AFS read bug

Fix a bug in AFS read whereby the request page afs_read::index isn't
incremented after calling ->page_done() if ->remain reaches 0, indicating
that the data read is complete.

Without this a NULL pointer exception happens when ->page_done() is called
twice for the last page because the page clearing loop will call it also
and afs_readpages_page_done() clears the current entry in the page list.

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: afs_readpages_page_done+0x21/0xa4 [kafs]
PGD 0
Oops: 0002 [#1] SMP
Modules linked in: kafs(E)
CPU: 2 PID: 3002 Comm: md5sum Tainted: G            E   4.10.0-fscache #485
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
task: ffff8804017d86c0 task.stack: ffff8803fc1d8000
RIP: 0010:afs_readpages_page_done+0x21/0xa4 [kafs]
RSP: 0018:ffff8803fc1db978 EFLAGS: 00010282
RAX: ffff880405d39af8 RBX: 0000000000000000 RCX: ffff880407d83ed4
RDX: 0000000000000000 RSI: ffff880405d39a00 RDI: ffff880405c6f400
RBP: ffff8803fc1db988 R08: 0000000000000000 R09: 0000000000000001
R10: ffff8803fc1db820 R11: ffff88040cf56000 R12: ffff8804088f1780
R13: ffff8804017d86c0 R14: ffff8804088f1780 R15: 0000000000003840
FS:  00007f8154469700(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 00000004016ec000 CR4: 00000000001406e0
Call Trace:
 afs_deliver_fs_fetch_data+0x5b9/0x60e [kafs]
 ? afs_make_call+0x316/0x4e8 [kafs]
 ? afs_make_call+0x359/0x4e8 [kafs]
 afs_deliver_to_call+0x173/0x2e8 [kafs]
 ? afs_make_call+0x316/0x4e8 [kafs]
 afs_make_call+0x37a/0x4e8 [kafs]
 ? wake_up_q+0x4f/0x4f
 ? __init_waitqueue_head+0x36/0x49
 afs_fs_fetch_data+0x21c/0x227 [kafs]
 ? afs_fs_fetch_data+0x21c/0x227 [kafs]
 afs_vnode_fetch_data+0xf3/0x1d2 [kafs]
 afs_readpages+0x314/0x3fd [kafs]
 __do_page_cache_readahead+0x208/0x2c5
 ondemand_readahead+0x3a2/0x3b7
 ? ondemand_readahead+0x3a2/0x3b7
 page_cache_async_readahead+0x5e/0x67
 generic_file_read_iter+0x23b/0x70c
 ? __inode_security_revalidate+0x2f/0x62
 __vfs_read+0xc4/0xe8
 vfs_read+0xd1/0x15a
 SyS_read+0x4c/0x89
 do_syscall_64+0x80/0x191
 entry_SYSCALL64_slow_path+0x25/0x25

Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/fsclient.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c05452a09398..4314f9e63a2c 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -390,9 +390,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		if (call->offset == PAGE_SIZE) {
 			if (req->page_done)
 				req->page_done(call, req);
+			req->index++;
 			if (req->remain > 0) {
 				call->offset = 0;
-				req->index++;
 				if (req->index >= req->nr_pages) {
 					call->unmarshall = 4;
 					goto begin_discard;
-- 
cgit v1.2.3


From 6a0e3999e5cb3daa0468073fcdee0767422a4056 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: afs: Make struct afs_read::remain 64-bit

Make struct afs_read::remain 64-bit so that it can handle huge transfers if
we ever request them or the server decides to give us a bit extra data (the
other fields there are already 64-bit).

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/fsclient.c | 8 ++++----
 fs/afs/internal.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4314f9e63a2c..0778c5b6b59b 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -321,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 	void *buffer;
 	int ret;
 
-	_enter("{%u,%zu/%u;%u/%llu}",
+	_enter("{%u,%zu/%u;%llu/%llu}",
 	       call->unmarshall, call->offset, call->count,
 	       req->remain, req->actual_len);
 
@@ -379,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		/* extract the returned data */
 	case 3:
-		_debug("extract data %u/%llu %zu/%u",
+		_debug("extract data %llu/%llu %zu/%u",
 		       req->remain, req->actual_len, call->offset, call->count);
 
 		buffer = kmap(req->pages[req->index]);
@@ -405,9 +405,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		/* Discard any excess data the server gave us */
 	begin_discard:
 	case 4:
-		size = min_t(size_t, sizeof(afs_discard_buffer), req->remain);
+		size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
 		call->count = size;
-		_debug("extract discard %u/%llu %zu/%u",
+		_debug("extract discard %llu/%llu %zu/%u",
 		       req->remain, req->actual_len, call->offset, call->count);
 
 		call->offset = 0;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 832555003d03..a6901360fb81 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -133,8 +133,8 @@ struct afs_read {
 	loff_t			pos;		/* Where to start reading */
 	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
+	loff_t			remain;		/* Amount remaining */
 	atomic_t		usage;
-	unsigned int		remain;		/* Amount remaining */
 	unsigned int		index;		/* Which page we're reading into */
 	unsigned int		nr_pages;
 	void (*page_done)(struct afs_call *, struct afs_read *);
-- 
cgit v1.2.3


From 2f5705a5c805e7f761f2228820656bb9363a3d8c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: afs: Use a bvec rather than a kvec in afs_send_pages()

Use a bvec rather than a kvec in afs_send_pages() as we don't then have to
call kmap() in advance.  This allows us to pass the array of contiguous
pages that we extracted through to rxrpc in one go rather than passing a
single page at a time.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 97 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 52 insertions(+), 45 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 419ef05dcb5e..bf45307ff201 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
 	call->buffer = NULL;
 }
 
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+			  struct bio_vec *bv, pgoff_t first, pgoff_t last,
+			  unsigned offset)
+{
+	struct page *pages[AFS_BVEC_MAX];
+	unsigned int nr, n, i, to, bytes = 0;
+
+	nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+	n = find_get_pages_contig(call->mapping, first, nr, pages);
+	ASSERTCMP(n, ==, nr);
+
+	msg->msg_flags |= MSG_MORE;
+	for (i = 0; i < nr; i++) {
+		to = PAGE_SIZE;
+		if (first + i >= last) {
+			to = call->last_to;
+			msg->msg_flags &= ~MSG_MORE;
+		}
+		bv[i].bv_page = pages[i];
+		bv[i].bv_len = to - offset;
+		bv[i].bv_offset = offset;
+		bytes += to - offset;
+		offset = 0;
+	}
+
+	iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
 static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 {
-	struct page *pages[8];
-	unsigned count, n, loop, offset, to;
+	struct bio_vec bv[AFS_BVEC_MAX];
+	unsigned int bytes, nr, loop, offset;
 	pgoff_t first = call->first, last = call->last;
 	int ret;
 
-	_enter("");
-
 	offset = call->first_offset;
 	call->first_offset = 0;
 
 	do {
-		_debug("attach %lx-%lx", first, last);
-
-		count = last - first + 1;
-		if (count > ARRAY_SIZE(pages))
-			count = ARRAY_SIZE(pages);
-		n = find_get_pages_contig(call->mapping, first, count, pages);
-		ASSERTCMP(n, ==, count);
-
-		loop = 0;
-		do {
-			struct bio_vec bvec = {.bv_page = pages[loop],
-					       .bv_offset = offset};
-			msg->msg_flags = 0;
-			to = PAGE_SIZE;
-			if (first + loop >= last)
-				to = call->last_to;
-			else
-				msg->msg_flags = MSG_MORE;
-			bvec.bv_len = to - offset;
-			offset = 0;
-
-			_debug("- range %u-%u%s",
-			       offset, to, msg->msg_flags ? " [more]" : "");
-			iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
-				      &bvec, 1, to - offset);
-
-			/* have to change the state *before* sending the last
-			 * packet as RxRPC might give us the reply before it
-			 * returns from sending the request */
-			if (first + loop >= last)
-				call->state = AFS_CALL_AWAIT_REPLY;
-			ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-						     msg, to - offset);
-			if (ret < 0)
-				break;
-		} while (++loop < count);
-		first += count;
-
-		for (loop = 0; loop < count; loop++)
-			put_page(pages[loop]);
+		afs_load_bvec(call, msg, bv, first, last, offset);
+		offset = 0;
+		bytes = msg->msg_iter.count;
+		nr = msg->msg_iter.nr_segs;
+
+		/* Have to change the state *before* sending the last
+		 * packet as RxRPC might give us the reply before it
+		 * returns from sending the request.
+		 */
+		if (first + nr >= last)
+			call->state = AFS_CALL_AWAIT_REPLY;
+		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+					     msg, bytes);
+		for (loop = 0; loop < nr; loop++)
+			put_page(bv[loop].bv_page);
 		if (ret < 0)
 			break;
+
+		first += nr;
 	} while (first <= last);
 
-	_leave(" = %d", ret);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 146a1192783697810b63a1e41c4d59fc93387340 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: afs: Fix the maths in afs_fs_store_data()

afs_fs_store_data() works out of the size of the write it's going to make,
but it uses 32-bit unsigned subtraction in one place that gets
automatically cast to loff_t.

However, if to < offset, then the number goes negative, but as the result
isn't signed, this doesn't get sign-extended to 64-bits when placed in a
loff_t.

Fix by casting the operands to loff_t.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 0778c5b6b59b..d9234b767287 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1236,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	_enter(",%x,{%x:%u},,",
 	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
 
-	size = to - offset;
+	size = (loff_t)to - (loff_t)offset;
 	if (first != last)
 		size += (loff_t)(last - first) << PAGE_SHIFT;
 	pos = (loff_t)first << PAGE_SHIFT;
-- 
cgit v1.2.3


From 1157f153f37a8586765034470e4f00a4a6c4ce6f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: afs: Invalid op ID should abort with RXGEN_OPCODE

When we are given an invalid operation ID, we should abort that with
RXGEN_OPCODE rather than RX_INVALID_OPERATION.

Also map RXGEN_OPCODE to -ENOTSUPP.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/misc.c  | 2 ++
 fs/afs/rxrpc.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 91ea1aa0d8b3..100b207efc9e 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
 	case RXKADDATALEN:	return -EKEYREJECTED;
 	case RXKADILLEGALLEVEL:	return -EKEYREJECTED;
 
+	case RXGEN_OPCODE:	return -ENOTSUPP;
+
 	default:		return -EREMOTEIO;
 	}
 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bf45307ff201..bf7761fe6ef5 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -465,7 +465,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 						abort_code, -ret, "KNC");
 			goto do_abort;
 		case -ENOTSUPP:
-			abort_code = RX_INVALID_OPERATION;
+			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KIV");
 			goto do_abort;
-- 
cgit v1.2.3


From 70af0e3bd65142f9e674961c975451638a7ce1d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: afs: Better abort and net error handling

If we receive a network error, a remote abort or a protocol error whilst
we're still transmitting data, make sure we return an appropriate error to
the caller rather than ESHUTDOWN or ECONNABORTED.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bf7761fe6ef5..22d26b369070 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -340,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
+	size_t offset;
+	u32 abort_code;
 	int ret;
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -388,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
 
-	/* have to change the state *before* sending the last packet as RxRPC
-	 * might give us the reply before it returns from sending the
-	 * request */
+	/* We have to change the state *before* sending the last packet as
+	 * rxrpc might give us the reply before it returns from sending the
+	 * request.  Further, if the send fails, we may already have been given
+	 * a notification and may have collected it.
+	 */
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
 	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -412,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	return afs_wait_for_call_to_complete(call);
 
 error_do_abort:
-	rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+	call->state = AFS_CALL_COMPLETE;
+	if (ret != -ECONNABORTED) {
+		rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+					-ret, "KSD");
+	} else {
+		abort_code = 0;
+		offset = 0;
+		rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+				       false, &abort_code);
+		ret = call->type->abort_to_error(abort_code);
+	}
 error_kill_call:
 	afs_put_call(call);
 	_leave(" = %d", ret);
@@ -459,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -EINPROGRESS:
 		case -EAGAIN:
 			goto out;
+		case -ECONNABORTED:
+			goto call_complete;
 		case -ENOTCONN:
 			abort_code = RX_CALL_DEAD;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KNC");
-			goto do_abort;
+			goto save_error;
 		case -ENOTSUPP:
 			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KIV");
-			goto do_abort;
+			goto save_error;
 		case -ENODATA:
 		case -EBADMSG:
 		case -EMSGSIZE:
@@ -478,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, EBADMSG, "KUM");
-			goto do_abort;
+			goto save_error;
 		}
 	}
 
@@ -489,8 +505,9 @@ out:
 	_leave("");
 	return;
 
-do_abort:
+save_error:
 	call->error = ret;
+call_complete:
 	call->state = AFS_CALL_COMPLETE;
 	goto done;
 }
@@ -538,6 +555,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 		_debug("call incomplete");
 		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 					RX_CALL_DEAD, -ret, abort_why);
+	} else if (call->error < 0) {
+		ret = call->error;
 	}
 
 	_debug("call complete");
-- 
cgit v1.2.3


From ab94f5d0dd6fd82e7eeca5e7c8096eaea0a0261f Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: afs: Populate and use client modification time

The inode timestamps should be set from the client time
in the status received from the server, rather than the
server time which is meant for internal server use.

Set AFS_SET_MTIME and populate the mtime for operations
that take an input status, such as file/dir creation
and StoreData.  If an input time is not provided the
server will set the vnode times based on the current server
time.

In a situation where the server has some skew with the
client, this could lead to the client seeing a timestamp
in the future for a file that it just created or wrote.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 18 +++++++++---------
 fs/afs/inode.c    |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index d9234b767287..19f76ae36982 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -111,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 			vnode->vfs_inode.i_mode = mode;
 		}
 
-		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_server;
+		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_client;
 		vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
 		vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
 		vnode->vfs_inode.i_version	= data_version;
@@ -734,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -1003,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
 		memset(bp, 0, c_padsz);
 		bp = (void *) bp + c_padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1203,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
@@ -1280,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4079c832ff27..aae55dd15108 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -85,7 +85,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	inode->i_uid		= vnode->status.owner;
 	inode->i_gid            = vnode->status.group;
 	inode->i_size		= vnode->status.size;
-	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
+	inode->i_ctime.tv_sec	= vnode->status.mtime_client;
 	inode->i_ctime.tv_nsec	= 0;
 	inode->i_atime		= inode->i_mtime = inode->i_ctime;
 	inode->i_blocks		= 0;
-- 
cgit v1.2.3


From 68ae849d7e674b83610bc7fdf74b21621a09b9ac Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: afs: Don't set PG_error on local EINTR or ENOMEM when filling a page

Don't set PG_error on a page if we get local EINTR or ENOMEM when filling a
page for writing.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index b5829443ff69..0d5b8508869b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -212,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
 			fscache_uncache_page(vnode->cache, page);
 #endif
 			BUG_ON(PageFsCache(page));
-			goto error;
+
+			if (ret == -EINTR ||
+			    ret == -ENOMEM ||
+			    ret == -ERESTARTSYS ||
+			    ret == -EAGAIN)
+				goto error;
+			goto io_error;
 		}
 
 		SetPageUptodate(page);
@@ -231,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
 	_leave(" = 0");
 	return 0;
 
+io_error:
+	SetPageError(page);
+	goto error;
 enomem:
 	ret = -ENOMEM;
 error:
-	SetPageError(page);
 	unlock_page(page);
 	_leave(" = %d", ret);
 	return ret;
-- 
cgit v1.2.3


From 6d06b0d25209c80e99c1e89700f1e09694a3766b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: afs: Fix page leak in afs_write_begin()

afs_write_begin() leaks a ref and a lock on a page if afs_fill_page()
fails.  Fix the leak by unlocking and releasing the page in the error path.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index f1450ea09406..6e13e96c3db0 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -154,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		kfree(candidate);
 		return -ENOMEM;
 	}
-	*pagep = page;
-	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
 	if (!PageUptodate(page) && len != PAGE_SIZE) {
 		ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
 		if (ret < 0) {
+			unlock_page(page);
+			put_page(page);
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
 			return ret;
@@ -167,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		SetPageUptodate(page);
 	}
 
+	/* page won't leak in error case: it eventually gets cleaned off LRU */
+	*pagep = page;
+
 try_again:
 	spin_lock(&vnode->writeback_lock);
 
-- 
cgit v1.2.3


From 7286a35e893176169b09715096a4aca557e2ccd2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: afs: Fix afs_kill_pages()

Fix afs_kill_pages() in two ways:

 (1) If a writeback has been partially flushed, then if we try and kill the
     pages it contains, some of them may no longer be undergoing writeback
     and end_page_writeback() will assert.

     Fix this by checking to see whether the page in question is actually
     undergoing writeback before ending that writeback.

 (2) The loop that scans for pages to kill doesn't increase the first page
     index, and so the loop may not terminate, but it will try to process
     the same pages over and over again.

     Fix this by increasing the first page index to one after the last page
     we processed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 6e13e96c3db0..134de0667898 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -321,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
 		ASSERTCMP(pv.nr, ==, count);
 
 		for (loop = 0; loop < count; loop++) {
-			ClearPageUptodate(pv.pages[loop]);
+			struct page *page = pv.pages[loop];
+			ClearPageUptodate(page);
 			if (error)
-				SetPageError(pv.pages[loop]);
-			end_page_writeback(pv.pages[loop]);
+				SetPageError(page);
+			if (PageWriteback(page))
+				end_page_writeback(page);
+			if (page->index >= first)
+				first = page->index + 1;
 		}
 
 		__pagevec_release(&pv);
-- 
cgit v1.2.3


From 445783d0ec173a52bef2e9b129de7d716a19b9fa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: afs: Fix an off-by-one error in afs_send_pages()

afs_send_pages() should only put the call into the AFS_CALL_AWAIT_REPLY
state if it has sent all the pages - but the check it makes is incorrect
and sometimes it will finish the loop early.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 22d26b369070..b12da6aa5412 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -315,7 +315,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 		 * packet as RxRPC might give us the reply before it
 		 * returns from sending the request.
 		 */
-		if (first + nr >= last)
+		if (first + nr - 1 >= last)
 			call->state = AFS_CALL_AWAIT_REPLY;
 		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
 					     msg, bytes);
-- 
cgit v1.2.3


From 954cd6dc02a65065aecb7150962c0870c5b0e322 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:49 +0000
Subject: afs: Fix abort on signal while waiting for call completion

Fix the way in which a call that's in progress and being waited for is
aborted in the case that EINTR is detected.  We should be sending
RX_USER_ABORT rather than RX_CALL_DEAD as the abort code.

Note that since the only two ways out of the loop are if the call completes
or if a signal happens, the kill-the-call clause after the loop has
finished can only happen in the case of EINTR.  This means that we only
have one abort case to deal with, not two, and the "KWC" case can never
happen and so can be deleted.

Note further that simply aborting the call isn't necessarily the best thing
here since at this point: the request has been entirely sent and it's
likely the server will do the operation anyway - whether we abort it or
not.  In future, we should punt the handling of the remainder of the call
off to a background thread.

Reported-by: Marc Dionne <marc.c.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index b12da6aa5412..8f76b13d5549 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -517,7 +517,6 @@ call_complete:
  */
 static int afs_wait_for_call_to_complete(struct afs_call *call)
 {
-	const char *abort_why;
 	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
@@ -536,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 			continue;
 		}
 
-		abort_why = "KWC";
-		ret = call->error;
-		if (call->state == AFS_CALL_COMPLETE)
-			break;
-		abort_why = "KWI";
-		ret = -EINTR;
-		if (signal_pending(current))
+		if (call->state == AFS_CALL_COMPLETE ||
+		    signal_pending(current))
 			break;
 		schedule();
 	}
@@ -550,15 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 	remove_wait_queue(&call->waitq, &myself);
 	__set_current_state(TASK_RUNNING);
 
-	/* kill the call */
+	/* Kill off the call if it's still live. */
 	if (call->state < AFS_CALL_COMPLETE) {
-		_debug("call incomplete");
+		_debug("call interrupted");
 		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
-					RX_CALL_DEAD, -ret, abort_why);
-	} else if (call->error < 0) {
-		ret = call->error;
+					RX_USER_ABORT, -EINTR, "KWI");
 	}
 
+	ret = call->error;
 	_debug("call complete");
 	afs_put_call(call);
 	_leave(" = %d", ret);
-- 
cgit v1.2.3


From 65a151094edeb04e8f5f6f1502028e2383e81bb8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:49 +0000
Subject: afs: ->writepage() shouldn't call clear_page_dirty_for_io()

The ->writepage() op shouldn't call clear_page_dirty_for_io() as that has
already been called by the caller.

Fix afs_writepage() by moving the call out of
afs_write_back_from_locked_page() to afs_writepages_region() where it is
needed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 134de0667898..e5f150bccfb5 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -231,7 +231,7 @@ flush_conflicting_wb:
 	if (wb->state == AFS_WBACK_PENDING)
 		wb->state = AFS_WBACK_CONFLICTING;
 	spin_unlock(&vnode->writeback_lock);
-	if (PageDirty(page)) {
+	if (clear_page_dirty_for_io(page)) {
 		ret = afs_write_back_from_locked_page(wb, page);
 		if (ret < 0) {
 			afs_put_writeback(candidate);
@@ -353,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 	_enter(",%lx", primary_page->index);
 
 	count = 1;
-	if (!clear_page_dirty_for_io(primary_page))
-		BUG();
 	if (test_set_page_writeback(primary_page))
 		BUG();
 
@@ -542,6 +540,8 @@ static int afs_writepages_region(struct address_space *mapping,
 		wb->state = AFS_WBACK_WRITING;
 		spin_unlock(&wb->vnode->writeback_lock);
 
+		if (!clear_page_dirty_for_io(page))
+			BUG();
 		ret = afs_write_back_from_locked_page(wb, page);
 		unlock_page(page);
 		put_page(page);
-- 
cgit v1.2.3


From c5051c7bc777dffa5661569dec5997f432b9a34a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:49 +0000
Subject: afs: Don't wait for page writeback with the page lock held

Drop the page lock before waiting for page writeback.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index e5f150bccfb5..2d2fccd5044b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -518,17 +518,16 @@ static int afs_writepages_region(struct address_space *mapping,
 		 */
 		lock_page(page);
 
-		if (page->mapping != mapping) {
+		if (page->mapping != mapping || !PageDirty(page)) {
 			unlock_page(page);
 			put_page(page);
 			continue;
 		}
 
-		if (wbc->sync_mode != WB_SYNC_NONE)
-			wait_on_page_writeback(page);
-
-		if (PageWriteback(page) || !PageDirty(page)) {
+		if (PageWriteback(page)) {
 			unlock_page(page);
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
 			put_page(page);
 			continue;
 		}
-- 
cgit v1.2.3


From e4c5d3762e2d6d274bd1cc948c47063becfa2103 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 Feb 2017 18:44:45 +0200
Subject: nvme-loop: fix a possible use-after-free when destroying the admin
 queue

we need to destroy the nvmet sq and let it finish gracefully
before continue to cleanup the queue.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index d1f06e7768ff..74e04a0855d4 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -288,9 +288,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = {
 
 static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
 {
+	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
-	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
 }
 
 static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
-- 
cgit v1.2.3


From d11ea004a458b982e19b188c386e25a9b66ec446 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 6 Mar 2017 18:46:20 +0200
Subject: nvmet: confirm sq percpu has scheduled and switched to atomic

percpu_ref_kill is not enough to prevent subsequent
percpu_ref_tryget_live from failing. Hence call
perfcpu_ref_kill_confirm to make it safe.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/core.c  | 11 ++++++++++-
 drivers/nvme/target/nvmet.h |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 11b0a0a5f661..798653b329b2 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -425,6 +425,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 	ctrl->sqs[qid] = sq;
 }
 
+static void nvmet_confirm_sq(struct percpu_ref *ref)
+{
+	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
+
+	complete(&sq->confirm_done);
+}
+
 void nvmet_sq_destroy(struct nvmet_sq *sq)
 {
 	/*
@@ -433,7 +440,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	 */
 	if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 		nvmet_async_events_free(sq->ctrl);
-	percpu_ref_kill(&sq->ref);
+	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
+	wait_for_completion(&sq->confirm_done);
 	wait_for_completion(&sq->free_done);
 	percpu_ref_exit(&sq->ref);
 
@@ -461,6 +469,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 		return ret;
 	}
 	init_completion(&sq->free_done);
+	init_completion(&sq->confirm_done);
 
 	return 0;
 }
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 1370eee0a3c0..f7ff15f17ca9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -73,6 +73,7 @@ struct nvmet_sq {
 	u16			qid;
 	u16			size;
 	struct completion	free_done;
+	struct completion	confirm_done;
 };
 
 /**
-- 
cgit v1.2.3


From b25634e2a051bef4b2524b11adddfbfa6448f6cd Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 9 Mar 2017 13:45:52 +0200
Subject: nvmet-rdma: Fix a possible uninitialized variable dereference

When handling a new recv command, we grab a new rsp resource and
check for the queue state being live. In case the queue is not in
live state, we simply restore the rsp back to the free list. However
in this flow we didn't set rsp->queue yet, so we cannot dereference it.

Instead, make sure to initialize rsp->queue (and other rsp members)
as soon as possible so we won't reference uninitialized variables.

Reported-by: Yi Zhang <yizhan@redhat.com>
Reported-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/rdma.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9aa1da3778b3..ecc4fe862561 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
 {
 	u16 status;
 
-	cmd->queue = queue;
-	cmd->n_rdma = 0;
-	cmd->req.port = queue->port;
-
-
 	ib_dma_sync_single_for_cpu(queue->dev->device,
 		cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
 		DMA_FROM_DEVICE);
@@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 
 	cmd->queue = queue;
 	rsp = nvmet_rdma_get_rsp(queue);
+	rsp->queue = queue;
 	rsp->cmd = cmd;
 	rsp->flags = 0;
 	rsp->req.cmd = cmd->nvme_cmd;
+	rsp->req.port = queue->port;
+	rsp->n_rdma = 0;
 
 	if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
 		unsigned long flags;
-- 
cgit v1.2.3


From 8f3dbfd79ed9ef9770305a7cc4e13dfd31ad2cd0 Mon Sep 17 00:00:00 2001
From: Kris Murphy <kriskend@linux.vnet.ibm.com>
Date: Thu, 16 Mar 2017 10:51:28 -0500
Subject: openvswitch: Add missing case OVS_TUNNEL_KEY_ATTR_PAD

Added a case for OVS_TUNNEL_KEY_ATTR_PAD to the switch statement
in ip_tun_from_nlattr in order to prevent the default case
returning an error.

Fixes: b46f6ded906e ("libnl: nla_put_be64(): align on a 64-bit area")
Signed-off-by: Kris Murphy <kriskend@linux.vnet.ibm.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a08ff834676b..1105a838bab8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -665,6 +665,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_VXLAN_OPT;
 			opts_type = type;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_PAD:
+			break;
 		default:
 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);
-- 
cgit v1.2.3


From 59cf8bed44a79ec42303151dd014fdb6434254bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:34:02 -0700
Subject: Input: iforce - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory that lie beyond the end of the endpoint
array should a malicious device lack the expected endpoints.

Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/iforce/iforce-usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index d96aa27dfcdc..db64adfbe1af 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -141,6 +141,9 @@ static int iforce_usb_probe(struct usb_interface *intf,
 
 	interface = intf->cur_altsetting;
 
+	if (interface->desc.bNumEndpoints < 2)
+		return -ENODEV;
+
 	epirq = &interface->endpoint[0].desc;
 	epout = &interface->endpoint[1].desc;
 
-- 
cgit v1.2.3


From ac2ee9ba953afe88f7a673e1c0c839227b1d7891 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:35:12 -0700
Subject: Input: cm109 - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: c04148f915e5 ("Input: add driver for USB VoIP phones with CM109...")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 2.6.28
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/cm109.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c
index 9cc6d057c302..23c191a2a071 100644
--- a/drivers/input/misc/cm109.c
+++ b/drivers/input/misc/cm109.c
@@ -700,6 +700,10 @@ static int cm109_usb_probe(struct usb_interface *intf,
 	int error = -ENOMEM;
 
 	interface = intf->cur_altsetting;
+
+	if (interface->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	endpoint = &interface->endpoint[0].desc;
 
 	if (!usb_endpoint_is_int_in(endpoint))
-- 
cgit v1.2.3


From 1916d319271664241b7aa0cd2b05e32bdb310ce9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:36:13 -0700
Subject: Input: ims-pcu - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack control-interface endpoints.

Fixes: 628329d52474 ("Input: add IMS Passenger Control Unit driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 3.10
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/ims-pcu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index 9c0ea36913b4..f4e8fbec6a94 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1667,6 +1667,10 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
 		return -EINVAL;
 
 	alt = pcu->ctrl_intf->cur_altsetting;
+
+	if (alt->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	pcu->ep_ctrl = &alt->endpoint[0].desc;
 	pcu->max_ctrl_size = usb_endpoint_maxp(pcu->ep_ctrl);
 
-- 
cgit v1.2.3


From 5cc4a1a9f5c179795c8a1f2b0f4361829d6a070e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:37:01 -0700
Subject: Input: yealink - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: aca951a22a1d ("[PATCH] input-driver-yealink-P1K-usb-phone")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 2.6.14
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/yealink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c
index 79c964c075f1..6e7ff9561d92 100644
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -875,6 +875,10 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	int ret, pipe, i;
 
 	interface = intf->cur_altsetting;
+
+	if (interface->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	endpoint = &interface->endpoint[0].desc;
 	if (!usb_endpoint_is_int_in(endpoint))
 		return -ENODEV;
-- 
cgit v1.2.3


From ba340d7b83703768ce566f53f857543359aa1b98 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:39:29 -0700
Subject: Input: hanwang - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: bba5394ad3bd ("Input: add support for Hanwang tablets")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 2.6.37
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/tablet/hanwang.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/tablet/hanwang.c b/drivers/input/tablet/hanwang.c
index cd852059b99e..df4bea96d7ed 100644
--- a/drivers/input/tablet/hanwang.c
+++ b/drivers/input/tablet/hanwang.c
@@ -340,6 +340,9 @@ static int hanwang_probe(struct usb_interface *intf, const struct usb_device_id
 	int error;
 	int i;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	hanwang = kzalloc(sizeof(struct hanwang), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!hanwang || !input_dev) {
-- 
cgit v1.2.3


From cb1b494663e037253337623bf1ef2df727883cb7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:41:55 -0700
Subject: Input: kbtab - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/tablet/kbtab.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c
index e850d7e8afbc..4d9d64908b59 100644
--- a/drivers/input/tablet/kbtab.c
+++ b/drivers/input/tablet/kbtab.c
@@ -122,6 +122,9 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i
 	struct input_dev *input_dev;
 	int error = -ENOMEM;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!kbtab || !input_dev)
-- 
cgit v1.2.3


From 92461f5d723037530c1f36cce93640770037812c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:43:09 -0700
Subject: Input: sur40 - validate number of endpoints before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory that lie beyond the end of the endpoint
array should a malicious device lack the expected endpoints.

Fixes: bdb5c57f209c ("Input: add sur40 driver for Samsung SUR40... ")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 3.13
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/sur40.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
index aefb6e11f88a..4c0eecae065c 100644
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c
@@ -527,6 +527,9 @@ static int sur40_probe(struct usb_interface *interface,
 	if (iface_desc->desc.bInterfaceClass != 0xFF)
 		return -ENODEV;
 
+	if (iface_desc->desc.bNumEndpoints < 5)
+		return -ENODEV;
+
 	/* Use endpoint #4 (0x86). */
 	endpoint = &iface_desc->endpoint[4].desc;
 	if (endpoint->bEndpointAddress != TOUCH_ENDPOINT)
-- 
cgit v1.2.3


From acfa28b3649ec07775efaac0c00de2db39d71634 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 1 Mar 2017 18:02:28 -0500
Subject: ARM: dts: NSP: GPIO reboot open-source

The libgpio code pre-sets the GPIO values for the gpio-reset in the
device tree.  This results in the device being reset during bringup.
To prevent this pre-setting, use the "open-source" flag in the device
tree.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: b1aaf88 ("ARM: dts: NSP: Add GPIO reboot method to bcm958625hr DTS file")
Fixes: 10baed1 ("ARM: dts: NSP: Add GPIO reboot method to bcm958625xmc DTS file")
Fixes: 088e3148 ("ARM: dts: NSP: Add new DT file for bcm958522er")
Fixes: e3227c1 ("ARM: dts: NSP: Add new DT file for bcm958525er")
Fixes: 2f8bc00 ("ARM: dts: NSP: Add new DT file for bcm958622hr")
Fixes: d454c37 ("ARM: dts: NSP: Add new DT file for bcm958623hr")
Fixes: f27eacf ("ARM: dts: NSP: Add new DT file for bcm988312hr")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm958522er.dts  | 1 +
 arch/arm/boot/dts/bcm958525er.dts  | 1 +
 arch/arm/boot/dts/bcm958525xmc.dts | 1 +
 arch/arm/boot/dts/bcm958622hr.dts  | 1 +
 arch/arm/boot/dts/bcm958623hr.dts  | 1 +
 arch/arm/boot/dts/bcm958625hr.dts  | 1 +
 arch/arm/boot/dts/bcm988312hr.dts  | 1 +
 7 files changed, 7 insertions(+)

diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts
index 3f04a40eb90c..df05e7f568af 100644
--- a/arch/arm/boot/dts/bcm958522er.dts
+++ b/arch/arm/boot/dts/bcm958522er.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts
index 9fd542200d3d..4a3ab19c6281 100644
--- a/arch/arm/boot/dts/bcm958525er.dts
+++ b/arch/arm/boot/dts/bcm958525er.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts
index 41e7fd350fcd..81f78435d8c7 100644
--- a/arch/arm/boot/dts/bcm958525xmc.dts
+++ b/arch/arm/boot/dts/bcm958525xmc.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 31 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts
index 477c4860db52..c88b8fefcb2f 100644
--- a/arch/arm/boot/dts/bcm958622hr.dts
+++ b/arch/arm/boot/dts/bcm958622hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts
index c0a499d5ba44..d503fa0dde31 100644
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index f7eb5854a224..cc0363b843c1 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm988312hr.dts b/arch/arm/boot/dts/bcm988312hr.dts
index 16666324fda8..74e15a3cd9f8 100644
--- a/arch/arm/boot/dts/bcm988312hr.dts
+++ b/arch/arm/boot/dts/bcm988312hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
-- 
cgit v1.2.3


From 271df90e4e530c17f237b27034d6341cb2c2f536 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitalywool@gmail.com>
Date: Thu, 16 Mar 2017 16:40:19 -0700
Subject: z3fold: fix spinlock unlocking in page reclaim

Commmit 5a27aa822029 ("z3fold: add kref refcounting") introduced a bug
in z3fold_reclaim_page() with function exit that may leave pool->lock
spinlock held.  Here comes the trivial fix.

Fixes: 5a27aa822029 ("z3fold: add kref refcounting")
Link: http://lkml.kernel.org/r/20170311222239.7b83d8e7ef1914e05497649f@gmail.com
Reported-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/z3fold.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8970a2fd3b1a..f9492bccfd79 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -667,6 +667,7 @@ next:
 			z3fold_page_unlock(zhdr);
 			spin_lock(&pool->lock);
 			if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+				spin_unlock(&pool->lock);
 				atomic64_dec(&pool->pages_nr);
 				return 0;
 			}
-- 
cgit v1.2.3


From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 16 Mar 2017 16:40:21 -0700
Subject: kasan: add a prototype of task_struct to avoid warning

Add a prototype of task_struct to fix below warning on arm64.

  In file included from arch/arm64/kernel/probes/kprobes.c:19:0:
  include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
   static inline void kasan_unpoison_task_stack(struct task_struct *task) {}

As same as other types (kmem_cache, page, and vm_struct) this adds a
prototype of task_struct data structure on top of kasan.h.

[arnd] A related warning was fixed before, but now appears in a
different line in the same file in v4.11-rc2.  The patch from Masami
Hiramatsu still seems appropriate, so let's take his version.

Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove <linux/sched.h> from <linux/kasan.h>")
Link: https://patchwork.kernel.org/patch/9569839/
Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Alexander Potapenko <glider@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 1c823bef4c15..5734480c9590 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -6,6 +6,7 @@
 struct kmem_cache;
 struct page;
 struct vm_struct;
+struct task_struct;
 
 #ifdef CONFIG_KASAN
 
-- 
cgit v1.2.3


From d0f33ac9ae7b2a727fb678235ae37baf1d0608d5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 16 Mar 2017 16:40:24 -0700
Subject: mm, x86: fix native_pud_clear build error

We still get a build error in random configurations, after this has been
modified a few times:

  In file included from include/linux/mm.h:68:0,
                   from include/linux/suspend.h:8,
                   from arch/x86/kernel/asm-offsets.c:12:
  arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear'
   #define pud_clear(pud)   native_pud_clear(pud)

My interpretation is that the build error comes from a typo in
__PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect
#ifdef around the native_pud_clear definition.

Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()")
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Ackedy-by: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable-3level.h | 3 ---
 arch/x86/include/asm/pgtable.h        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5..50d35e3185f5 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-		defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024..585ee0d42d18 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)			native_pud_clear(pud)
 #endif
 
-- 
cgit v1.2.3


From 171012f561274784160f666f8398af8b42216e1f Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 16 Mar 2017 16:40:27 -0700
Subject: mm: don't warn when vmalloc() fails due to a fatal signal

When vmalloc() fails it prints a very lengthy message with all the
details about memory consumption assuming that it happened due to OOM.

However, vmalloc() can also fail due to fatal signal pending.  In such
case the message is quite confusing because it suggests that it is OOM
but the numbers suggest otherwise.  The messages can also pollute
console considerably.

Don't warn when vmalloc() fails due to fatal signal pending.

Link: http://lkml.kernel.org/r/20170313114425.72724-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0dd80222b20b..0b057628a7ba 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 		if (fatal_signal_pending(current)) {
 			area->nr_pages = i;
-			goto fail;
+			goto fail_no_warn;
 		}
 
 		if (node == NUMA_NO_NODE)
@@ -1709,6 +1709,7 @@ fail:
 	warn_alloc(gfp_mask, NULL,
 			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
 			  (area->nr_pages*PAGE_SIZE), area->size);
+fail_no_warn:
 	vfree(area->addr);
 	return NULL;
 }
-- 
cgit v1.2.3


From 55adc1d05dca9e949cdf46c747cb1e91c0e9143d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 16:40:30 -0700
Subject: mm: add private lock to serialize memory hotplug operations

Commit bfc8c90139eb ("mem-hotplug: implement get/put_online_mems")
introduced new functions get/put_online_mems() and mem_hotplug_begin/end()
in order to allow similar semantics for memory hotplug like for cpu
hotplug.

The corresponding functions for cpu hotplug are get/put_online_cpus()
and cpu_hotplug_begin/done() for cpu hotplug.

The commit however missed to introduce functions that would serialize
memory hotplug operations like they are done for cpu hotplug with
cpu_maps_update_begin/done().

This basically leaves mem_hotplug.active_writer unprotected and allows
concurrent writers to modify it, which may lead to problems as outlined
by commit f931ab479dd2 ("mm: fix devm_memremap_pages crash, use
mem_hotplug_{begin, done}").

That commit was extended again with commit b5d24fda9c3d ("mm,
devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin,
done}") which serializes memory hotplug operations for some call sites
by using the device_hotplug lock.

In addition with commit 3fc21924100b ("mm: validate device_hotplug is held
for memory hotplug") a sanity check was added to mem_hotplug_begin() to
verify that the device_hotplug lock is held.

This in turn triggers the following warning on s390:

WARNING: CPU: 6 PID: 1 at drivers/base/core.c:643 assert_held_device_hotplug+0x4a/0x58
 Call Trace:
  assert_held_device_hotplug+0x40/0x58)
  mem_hotplug_begin+0x34/0xc8
  add_memory_resource+0x7e/0x1f8
  add_memory+0xda/0x130
  add_memory_merged+0x15c/0x178
  sclp_detect_standby_memory+0x2ae/0x2f8
  do_one_initcall+0xa2/0x150
  kernel_init_freeable+0x228/0x2d8
  kernel_init+0x2a/0x140
  kernel_thread_starter+0x6/0xc

One possible fix would be to add more lock_device_hotplug() and
unlock_device_hotplug() calls around each call site of
mem_hotplug_begin/end().  But that would give the device_hotplug lock
additional semantics it better should not have (serialize memory hotplug
operations).

Instead add a new memory_add_remove_lock which has the similar semantics
like cpu_add_remove_lock for cpu hotplug.

To keep things hopefully a bit easier the lock will be locked and unlocked
within the mem_hotplug_begin/end() functions.

Link: http://lkml.kernel.org/r/20170314125226.16779-2-heiko.carstens@de.ibm.com
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/memremap.c   | 4 ----
 mm/memory_hotplug.c | 6 +++++-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 06123234f118..07e85e5229da 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(resource_size(res), SECTION_SIZE);
 
-	lock_device_hotplug();
 	mem_hotplug_begin();
 	arch_remove_memory(align_start, align_size);
 	mem_hotplug_done();
-	unlock_device_hotplug();
 
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
 	pgmap_radix_release(res);
@@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 	if (error)
 		goto err_pfn_remap;
 
-	lock_device_hotplug();
 	mem_hotplug_begin();
 	error = arch_add_memory(nid, align_start, align_size, true);
 	mem_hotplug_done();
-	unlock_device_hotplug();
 	if (error)
 		goto err_add_memory;
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 295479b792ec..6fa7208bcd56 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -125,9 +125,12 @@ void put_online_mems(void)
 
 }
 
+/* Serializes write accesses to mem_hotplug.active_writer. */
+static DEFINE_MUTEX(memory_add_remove_lock);
+
 void mem_hotplug_begin(void)
 {
-	assert_held_device_hotplug();
+	mutex_lock(&memory_add_remove_lock);
 
 	mem_hotplug.active_writer = current;
 
@@ -147,6 +150,7 @@ void mem_hotplug_done(void)
 	mem_hotplug.active_writer = NULL;
 	mutex_unlock(&mem_hotplug.lock);
 	memhp_lock_release();
+	mutex_unlock(&memory_add_remove_lock);
 }
 
 /* add this memory to iomem resource */
-- 
cgit v1.2.3


From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 16:40:33 -0700
Subject: drivers core: remove assert_held_device_hotplug()

The last caller of assert_held_device_hotplug() is gone, so remove it again.

Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/core.c    | 5 -----
 include/linux/device.h | 1 -
 2 files changed, 6 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 684bda4d14a1..6bb60fb6a30b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void)
 	return restart_syscall();
 }
 
-void assert_held_device_hotplug(void)
-{
-	lockdep_assert_held(&device_hotplug_lock);
-}
-
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
diff --git a/include/linux/device.h b/include/linux/device.h
index 30c4570e928d..9ef518af5515 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
-void assert_held_device_hotplug(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
-- 
cgit v1.2.3


From 966fa72a716ceafc69de901a31f7cc1f52b35f81 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Tue, 14 Mar 2017 08:17:00 +0530
Subject: kernfs: Check KERNFS_HAS_RELEASE before calling kernfs_release_file()

Recently started seeing a kernel oops when a module tries removing a
memory mapped sysfs bin_attribute. On closer investigation the root
cause seems to be kernfs_release_file() trying to call
kernfs_op.release() callback that's NULL for such sysfs
bin_attributes. The oops occurs when kernfs_release_file() is called from
kernfs_drain_open_files() to cleanup any open handles with active
memory mappings.

The patch fixes this by checking for flag KERNFS_HAS_RELEASE before
calling kernfs_release_file() in function kernfs_drain_open_files().

On ppc64-le arch with cxl module the oops back-trace is of the
form below:
[  861.381126] Unable to handle kernel paging request for instruction fetch
[  861.381360] Faulting instruction address: 0x00000000
[  861.381428] Oops: Kernel access of bad area, sig: 11 [#1]
....
[  861.382481] NIP: 0000000000000000 LR: c000000000362c60 CTR:
0000000000000000
....
Call Trace:
[c000000f1680b750] [c000000000362c34] kernfs_drain_open_files+0x104/0x1d0 (unreliable)
[c000000f1680b790] [c00000000035fa00] __kernfs_remove+0x260/0x2c0
[c000000f1680b820] [c000000000360da0] kernfs_remove_by_name_ns+0x60/0xe0
[c000000f1680b8b0] [c0000000003638f4] sysfs_remove_bin_file+0x24/0x40
[c000000f1680b8d0] [c00000000062a164] device_remove_bin_file+0x24/0x40
[c000000f1680b8f0] [d000000009b7b22c] cxl_sysfs_afu_remove+0x144/0x170 [cxl]
[c000000f1680b940] [d000000009b7c7e4] cxl_remove+0x6c/0x1a0 [cxl]
[c000000f1680b990] [c00000000052f694] pci_device_remove+0x64/0x110
[c000000f1680b9d0] [c0000000006321d4] device_release_driver_internal+0x1f4/0x2b0
[c000000f1680ba20] [c000000000525cb0] pci_stop_bus_device+0xa0/0xd0
[c000000f1680ba60] [c000000000525e80] pci_stop_and_remove_bus_device+0x20/0x40
[c000000f1680ba90] [c00000000004a6c4] pci_hp_remove_devices+0x84/0xc0
[c000000f1680bad0] [c00000000004a688] pci_hp_remove_devices+0x48/0xc0
[c000000f1680bb10] [c0000000009dfda4] eeh_reset_device+0xb0/0x290
[c000000f1680bbb0] [c000000000032b4c] eeh_handle_normal_event+0x47c/0x530
[c000000f1680bc60] [c000000000032e64] eeh_handle_event+0x174/0x350
[c000000f1680bd10] [c000000000033228] eeh_event_handler+0x1e8/0x1f0
[c000000f1680bdc0] [c0000000000d384c] kthread+0x14c/0x190
[c000000f1680be30] [c00000000000b5a0] ret_from_kernel_thread+0x5c/0xbc

Fixes: f83f3c515654 ("kernfs: fix locking around kernfs_ops->release() callback")
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8e4dc7ab584c..ac2dfe0c5a9c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -809,7 +809,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
 		if (kn->flags & KERNFS_HAS_MMAP)
 			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
 
-		kernfs_release_file(kn, of);
+		if (kn->flags & KERNFS_HAS_RELEASE)
+			kernfs_release_file(kn, of);
 	}
 
 	mutex_unlock(&kernfs_open_file_mutex);
-- 
cgit v1.2.3


From 4cbe4dac82e423ecc9a0ba46af24a860853259f4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 13 Mar 2017 19:29:08 +0200
Subject: net/mlx4_core: Avoid delays during VF driver device shutdown

Some Hypervisors detach VFs from VMs by instantly causing an FLR event
to be generated for a VF.

In the mlx4 case, this will cause that VF's comm channel to be disabled
before the VM has an opportunity to invoke the VF device's "shutdown"
method.

For such Hypervisors, there is a race condition between the VF's
shutdown method and its internal-error detection/reset thread.

The internal-error detection/reset thread (which runs every 5 seconds) also
detects a disabled comm channel. If the internal-error detection/reset
flow wins the race, we still get delays (while that flow tries repeatedly
to detect comm-channel recovery).

The cited commit fixed the command timeout problem when the
internal-error detection/reset flow loses the race.

This commit avoids the unneeded delays when the internal-error
detection/reset flow wins.

Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reported-by: Simon Xiao <sixiao@microsoft.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  | 11 +++++++++++
 drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++
 include/linux/mlx4/device.h               |  1 +
 3 files changed, 23 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e8c105164931..0e0fa7030565 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2305,6 +2305,17 @@ static int sync_toggles(struct mlx4_dev *dev)
 		rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
 		if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
 			/* PCI might be offline */
+
+			/* If device removal has been requested,
+			 * do not continue retrying.
+			 */
+			if (dev->persist->interface_state &
+			    MLX4_INTERFACE_STATE_NOWAIT) {
+				mlx4_warn(dev,
+					  "communication channel is offline\n");
+				return -EIO;
+			}
+
 			msleep(100);
 			wr_toggle = swab32(readl(&priv->mfunc.comm->
 					   slave_write));
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 21377c315083..703205475524 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev)
 			       (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
 		if (!offline_bit)
 			return 0;
+
+		/* If device removal has been requested,
+		 * do not continue retrying.
+		 */
+		if (dev->persist->interface_state &
+		    MLX4_INTERFACE_STATE_NOWAIT)
+			break;
+
 		/* There are cases as part of AER/Reset flow that PF needs
 		 * around 100 msec to load. We therefore sleep for 100 msec
 		 * to allow other tasks to make use of that CPU during this
@@ -3955,6 +3963,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct devlink *devlink = priv_to_devlink(priv);
 	int active_vfs = 0;
 
+	if (mlx4_is_slave(dev))
+		persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
 	mutex_lock(&persist->interface_state_mutex);
 	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
 	mutex_unlock(&persist->interface_state_mutex);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7e66e4f62858..1beb1ec2fbdf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -476,6 +476,7 @@ enum {
 enum {
 	MLX4_INTERFACE_STATE_UP		= 1 << 0,
 	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+	MLX4_INTERFACE_STATE_NOWAIT	= 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
-- 
cgit v1.2.3


From bc9ab9231ec8c08352ea860480523d88a221a68f Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Mon, 13 Mar 2017 19:07:16 -0400
Subject: net: ethernet: aquantia: set net_device mtu when mtu is changed

When the aquantia device mtu is changed the net_device structure is not
updated.  As a result the ip command does not properly reflect the mtu change.

Commit 5513e16421cb incorrectly assumed that __dev_set_mtu() was making the
assignment ndev->mtu = new_mtu;  This is not true in the case where the driver
has a ndo_change_mtu routine.

Fixes: 5513e16421cb ("net: ethernet: aquantia: Fixes for aq_ndev_change_mtu")

Cc: Pavel Belous <Pavel.Belous@aquantia.com>
Signed-off-by: David Arcari <darcari@redhat.com>
Tested-by: Pavel Belous <pavel.belous@aquantia.com>
Reviewed-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index dad63623be6a..d05fbfdce5e5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -98,6 +98,7 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
 
 	if (err < 0)
 		goto err_exit;
+	ndev->mtu = new_mtu;
 
 	if (netif_running(ndev)) {
 		aq_ndev_close(ndev);
-- 
cgit v1.2.3


From 61733c91c454a61be0ffc93fe46a5d5f2f048c1c Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 13 Mar 2017 16:49:10 -0700
Subject: net: mpls: Fix nexthop alive tracking on down events

Alive tracking of nexthops can account for a link twice if the carrier
goes down followed by an admin down of the same link rendering multipath
routes useless. This is similar to 79099aab38c8 for UNREGISTER events and
DOWN events.

Fix by tracking number of alive nexthops in mpls_ifdown similar to the
logic in mpls_ifup. Checking the flags per nexthop once after all events
have been processed is simpler than trying to maintian a running count
through all event combinations.

Also, WRITE_ONCE is used instead of ACCESS_ONCE to set rt_nhn_alive
per a comment from checkpatch:
    WARNING: Prefer WRITE_ONCE(<FOO>, <BAR>) over ACCESS_ONCE(<FOO>) = <BAR>

Fixes: c89359a42e2a4 ("mpls: support for dead routes")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 33211f9a2656..6414079aa729 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1269,6 +1269,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
 {
 	struct mpls_route __rcu **platform_label;
 	struct net *net = dev_net(dev);
+	unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
+	unsigned int alive;
 	unsigned index;
 
 	platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1278,9 +1280,11 @@ static void mpls_ifdown(struct net_device *dev, int event)
 		if (!rt)
 			continue;
 
+		alive = 0;
 		change_nexthops(rt) {
 			if (rtnl_dereference(nh->nh_dev) != dev)
-				continue;
+				goto next;
+
 			switch (event) {
 			case NETDEV_DOWN:
 			case NETDEV_UNREGISTER:
@@ -1288,13 +1292,16 @@ static void mpls_ifdown(struct net_device *dev, int event)
 				/* fall through */
 			case NETDEV_CHANGE:
 				nh->nh_flags |= RTNH_F_LINKDOWN;
-				if (event != NETDEV_UNREGISTER)
-					ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
 				break;
 			}
 			if (event == NETDEV_UNREGISTER)
 				RCU_INIT_POINTER(nh->nh_dev, NULL);
+next:
+			if (!(nh->nh_flags & nh_flags))
+				alive++;
 		} endfor_nexthops(rt);
+
+		WRITE_ONCE(rt->rt_nhn_alive, alive);
 	}
 }
 
-- 
cgit v1.2.3


From 4ee39733fbecf04cf9f346de2d64788c35028079 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 15 Mar 2017 18:14:33 -0700
Subject: net: ipv6: set route type for anycast routes

Anycast routes have the RTF_ANYCAST flag set, but when dumping routes
for userspace the route type is not set to RTN_ANYCAST. Make it so.

Fixes: 58c4fb86eabcb ("[IPV6]: Flag RTF_ANYCAST for anycast routes")
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 35c58b669ebd..9db1418993f2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3423,6 +3423,8 @@ static int rt6_fill_node(struct net *net,
 	}
 	else if (rt->rt6i_flags & RTF_LOCAL)
 		rtm->rtm_type = RTN_LOCAL;
+	else if (rt->rt6i_flags & RTF_ANYCAST)
+		rtm->rtm_type = RTN_ANYCAST;
 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
 		rtm->rtm_type = RTN_LOCAL;
 	else
-- 
cgit v1.2.3


From 687e0687f71ec00e0132a21fef802dee88c2f1ad Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 14 Mar 2017 17:55:45 +0100
Subject: USB: usbtmc: add missing endpoint sanity check

USBTMC devices are required to have a bulk-in and a bulk-out endpoint,
but the driver failed to verify this, something which could lead to the
endpoint addresses being taken from uninitialised memory.

Make sure to zero all private data as part of allocation, and add the
missing endpoint sanity check.

Note that this also addresses a more recently introduced issue, where
the interrupt-in-presence flag would also be uninitialised whenever the
optional interrupt-in endpoint is not present. This in turn could lead
to an interrupt urb being allocated, initialised and submitted based on
uninitialised values.

Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.")
Fixes: 5b775f672cc9 ("USB: add USB test and measurement class driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index f03692ec5520..5e3446db4513 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1381,7 +1381,7 @@ static int usbtmc_probe(struct usb_interface *intf,
 
 	dev_dbg(&intf->dev, "%s called\n", __func__);
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -1444,6 +1444,13 @@ static int usbtmc_probe(struct usb_interface *intf,
 			break;
 		}
 	}
+
+	if (!data->bulk_out || !data->bulk_in) {
+		dev_err(&intf->dev, "bulk endpoints not found\n");
+		retcode = -ENODEV;
+		goto err_put;
+	}
+
 	/* Find int endpoint */
 	for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) {
 		endpoint = &iface_desc->endpoint[n].desc;
@@ -1512,6 +1519,7 @@ error_register:
 	sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp);
 	sysfs_remove_group(&intf->dev.kobj, &data_attr_grp);
 	usbtmc_free_int(data);
+err_put:
 	kref_put(&data->kref, usbtmc_delete);
 	return retcode;
 }
-- 
cgit v1.2.3


From 2e47c53503eb9faff42b3cfa144a833344dd1f89 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 14 Mar 2017 17:55:46 +0100
Subject: USB: usbtmc: fix probe error path

Make sure to initialise the return value to avoid having allocation
failures going unnoticed when allocating interrupt-endpoint resources.

This prevents use-after-free or worse when the device is later unbound.

Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.")
Cc: stable <stable@vger.kernel.org>     # 4.6
Cc: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 5e3446db4513..8fb309a0ff6b 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1476,8 +1476,10 @@ static int usbtmc_probe(struct usb_interface *intf,
 	if (data->iin_ep_present) {
 		/* allocate int urb */
 		data->iin_urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!data->iin_urb)
+		if (!data->iin_urb) {
+			retcode = -ENOMEM;
 			goto error_register;
+		}
 
 		/* Protect interrupt in endpoint data until iin_urb is freed */
 		kref_get(&data->kref);
@@ -1485,8 +1487,10 @@ static int usbtmc_probe(struct usb_interface *intf,
 		/* allocate buffer for interrupt in */
 		data->iin_buffer = kmalloc(data->iin_wMaxPacketSize,
 					GFP_KERNEL);
-		if (!data->iin_buffer)
+		if (!data->iin_buffer) {
+			retcode = -ENOMEM;
 			goto error_register;
+		}
 
 		/* fill interrupt urb */
 		usb_fill_int_urb(data->iin_urb, data->usb_dev,
-- 
cgit v1.2.3


From cdd7928df0d2efaa3270d711963773a08a4cc8ab Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 14 Mar 2017 12:09:56 +0100
Subject: ACM gadget: fix endianness in notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gadget code exports the bitfield for serial status changes
over the wire in its internal endianness. The fix is to convert
to little endian before sending it over the wire.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: 家瑋 <momo1208@gmail.com>
CC: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_acm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c
index a30766ca4226..5e3828d9dac7 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm)
 {
 	struct usb_composite_dev *cdev = acm->port.func.config->cdev;
 	int			status;
+	__le16			serial_state;
 
 	spin_lock(&acm->lock);
 	if (acm->notify_req) {
 		dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n",
 			acm->port_num, acm->serial_state);
+		serial_state = cpu_to_le16(acm->serial_state);
 		status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE,
-				0, &acm->serial_state, sizeof(acm->serial_state));
+				0, &serial_state, sizeof(acm->serial_state));
 	} else {
 		acm->pending = true;
 		status = 0;
-- 
cgit v1.2.3


From 9501df3cd9204f5859f649182431616a31ee88a1 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 23:38:07 -0400
Subject: ibmvnic: Free tx/rx scrq pointer array when releasing sub-crqs

The pointer array for the tx/rx sub crqs should be free'ed when
releasing the tx/rx sub crqs.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5f11b4dc95d2..b23d6545f835 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1257,6 +1257,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				release_sub_crq_queue(adapter,
 						      adapter->tx_scrq[i]);
 			}
+		kfree(adapter->tx_scrq);
 		adapter->tx_scrq = NULL;
 	}
 
@@ -1269,6 +1270,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				release_sub_crq_queue(adapter,
 						      adapter->rx_scrq[i]);
 			}
+		kfree(adapter->rx_scrq);
 		adapter->rx_scrq = NULL;
 	}
 }
-- 
cgit v1.2.3


From 4d4a6ac73e7466c2085c307fac41f74ce4568a45 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:10 +0000
Subject: rxrpc: Ignore BUSY packets on old calls

If we receive a BUSY packet for a call we think we've just completed, the
packet is handed off to the connection processor to deal with - but the
connection processor doesn't expect a BUSY packet and so flags a protocol
error.

Fix this by simply ignoring the BUSY packet for the moment.

The symptom of this may appear as a system call failing with EPROTO.  This
may be triggered by pressing ctrl-C under some circumstances.

This comes about we abort calls due to interruption by a signal (which we
shouldn't do, but that's going to be a large fix and mostly in fs/afs/).
What happens is that we abort the call and may also abort follow up calls
too (this needs offloading somehoe).  So we see a transmission of something
like the following sequence of packets:

	DATA for call N
	ABORT call N
	DATA for call N+1
	ABORT call N+1

in very quick succession on the same channel.  However, the peer may have
deferred the processing of the ABORT from the call N to a background thread
and thus sees the DATA message from the call N+1 coming in before it has
cleared the channel.  Thus it sends a BUSY packet[*].

[*] Note that some implementations (OpenAFS, for example) mark the BUSY
    packet with one plus the callNumber of the call prior to call N.
    Ordinarily, this would be call N, but there's no requirement for the
    calls on a channel to be numbered strictly sequentially (the number is
    required to increase).

    This is wrong and means that the callNumber in the BUSY packet should
    be ignored (it really ought to be N+1 since that's what it's in
    response to).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/conn_event.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 3f9d8d7ec632..b099b64366f3 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		rxrpc_conn_retransmit_call(conn, skb);
 		return 0;
 
+	case RXRPC_PACKET_TYPE_BUSY:
+		/* Just ignore BUSY packets for now. */
+		return 0;
+
 	case RXRPC_PACKET_TYPE_ABORT:
 		if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
 				  &wtmp, sizeof(wtmp)) < 0)
-- 
cgit v1.2.3


From d12c917691b45d9dffcfe7c2362d25caa40905fd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 16 Mar 2017 10:32:42 -0700
Subject: bridge: resolve a false alarm of lockdep

Andrei reported a false alarm of lockdep at net/bridge/br_fdb.c:109,
this is because in Andrei's case, a spin_bug() was already triggered
before this, therefore the debug_locks is turned off, lockdep_is_held()
is no longer accurate after that. We should use lockdep_assert_held_once()
instead of lockdep_is_held() to respect debug_locks.

Fixes: 410b3d48f5111 ("bridge: fdb: add proper lock checks in searching functions")
Reported-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c     | 2 +-
 net/bridge/br_private.h | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4f598dc2d916..6e08b7199dd7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
-	WARN_ON_ONCE(!br_hash_lock_held(br));
+	lockdep_assert_held_once(&br->hash_lock);
 
 	rcu_read_lock();
 	fdb = fdb_find_rcu(head, addr, vid);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2288fca7756c..61368186edea 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid);
 
-static inline bool br_hash_lock_held(struct net_bridge *br)
-{
-#ifdef CONFIG_LOCKDEP
-	return lockdep_is_held(&br->hash_lock);
-#else
-	return true;
-#endif
-}
-
 /* br_forward.c */
 enum br_pkt_type {
 	BR_PKT_UNICAST,
-- 
cgit v1.2.3


From e14b4db7a567ff507453ecd9c64da51bbc2b6d23 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 16 Mar 2017 12:21:32 -0700
Subject: netvsc: fix race during initialization

When device is being setup on boot, there is a small race where
network device callback is registered, but the netvsc_device pointer
is not set yet.  This can cause a NULL ptr dereference if packet
arrives during this window.

Fixes: 46b4f7f5d1f7 ("netvsc: eliminate per-device outstanding send counter")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 4c1d8cca247b..8dd0b8770328 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1231,8 +1231,11 @@ void netvsc_channel_cb(void *context)
 		return;
 
 	net_device = net_device_to_netvsc_device(ndev);
-	if (unlikely(net_device->destroy) &&
-	    netvsc_channel_idle(net_device, q_idx))
+	if (unlikely(!net_device))
+		return;
+
+	if (unlikely(net_device->destroy &&
+		     netvsc_channel_idle(net_device, q_idx)))
 		return;
 
 	/* commit_rd_index() -> hv_signal_on_read() needs this. */
-- 
cgit v1.2.3


From 7b2db29fbb4e766fcd02207eb2e2087170bd6ebc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 8 Mar 2017 10:19:36 -0800
Subject: usb: hub: Fix crash after failure to read BOS descriptor

If usb_get_bos_descriptor() returns an error, usb->bos will be NULL.
Nevertheless, it is dereferenced unconditionally in
hub_set_initial_usb2_lpm_policy() if usb2_hw_lpm_capable is set.
This results in a crash.

usb 5-1: unable to get BOS descriptor
...
Unable to handle kernel NULL pointer dereference at virtual address 00000008
pgd = ffffffc00165f000
[00000008] *pgd=000000000174f003, *pud=000000000174f003,
		*pmd=0000000001750003, *pte=00e8000001751713
Internal error: Oops: 96000005 [#1] PREEMPT SMP
Modules linked in: uinput uvcvideo videobuf2_vmalloc cmac [ ... ]
CPU: 5 PID: 3353 Comm: kworker/5:3 Tainted: G    B 4.4.52 #480
Hardware name: Google Kevin (DT)
Workqueue: events driver_set_config_work
task: ffffffc0c3690000 ti: ffffffc0ae9a8000 task.ti: ffffffc0ae9a8000
PC is at hub_port_init+0xc3c/0xd10
LR is at hub_port_init+0xc3c/0xd10
...
Call trace:
[<ffffffc0007fbbfc>] hub_port_init+0xc3c/0xd10
[<ffffffc0007fbe2c>] usb_reset_and_verify_device+0x15c/0x82c
[<ffffffc0007fc5e0>] usb_reset_device+0xe4/0x298
[<ffffffbffc0e3fcc>] rtl8152_probe+0x84/0x9b0 [r8152]
[<ffffffc00080ca8c>] usb_probe_interface+0x244/0x2f8
[<ffffffc000774a24>] driver_probe_device+0x180/0x3b4
[<ffffffc000774e48>] __device_attach_driver+0xb4/0xe0
[<ffffffc000772168>] bus_for_each_drv+0xb4/0xe4
[<ffffffc0007747ec>] __device_attach+0xd0/0x158
[<ffffffc000775080>] device_initial_probe+0x24/0x30
[<ffffffc0007739d4>] bus_probe_device+0x50/0xe4
[<ffffffc000770bd0>] device_add+0x414/0x738
[<ffffffc000809fe8>] usb_set_configuration+0x89c/0x914
[<ffffffc00080a120>] driver_set_config_work+0xc0/0xf0
[<ffffffc000249bb8>] process_one_work+0x390/0x6b8
[<ffffffc00024abcc>] worker_thread+0x480/0x610
[<ffffffc000251a80>] kthread+0x164/0x178
[<ffffffc0002045d0>] ret_from_fork+0x10/0x40

Since we don't know anything about LPM capabilities without BOS descriptor,
don't attempt to enable LPM if it is not available.

Fixes: 890dae886721 ("xhci: Enable LPM support only for hardwired ...")
Cc: stable <stable@vger.kernel.org>
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index f0dd08198d74..5286bf67869a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4275,7 +4275,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev)
 	struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);
 	int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN;
 
-	if (!udev->usb2_hw_lpm_capable)
+	if (!udev->usb2_hw_lpm_capable || !udev->bos)
 		return;
 
 	if (hub)
-- 
cgit v1.2.3


From db7f00b8dba6d687b6ab1f2e9309acfd214fcb4b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Mar 2017 15:43:19 -0700
Subject: tcp: tcp_get_info() should read tcp_time_stamp later

Commit b369e7fd41f7 ("tcp: make TCP_INFO more consistent") moved
lock_sock_fast() earlier in tcp_get_info()

This has the minor effect that jiffies value being sampled at the
beginning of tcp_get_info() is more likely to be off by one, and we
report big tcpi_last_data_sent values (like 0xFFFFFFFF).

Since we lock the socket, fetching tcp_time_stamp right before
doing the jiffies_to_msecs() calls is enough to remove these
wrong values.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf4555581282..1e319a525d51 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2770,7 +2770,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now = tcp_time_stamp, intv;
+	u32 now, intv;
 	u64 rate64;
 	bool slow;
 	u32 rate;
@@ -2839,6 +2839,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_retrans = tp->retrans_out;
 	info->tcpi_fackets = tp->fackets_out;
 
+	now = tcp_time_stamp;
 	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
-- 
cgit v1.2.3


From b767ad726c2aa6219318bf0da83fbe690e653d9a Mon Sep 17 00:00:00 2001
From: Aleksey Makarov <aleksey.makarov@linaro.org>
Date: Wed, 1 Mar 2017 18:23:02 +0300
Subject: Revert "tty: serial: pl011: add ttyAMA for matching pl011 console"

The original patch makes the condition always true, so it is wrong.

It masks (but not fixes) the bug described in the commit message
but introduces a regression (no console is selected by SPCR)
in regular (no 'console=ttyAMA') case.

s/||/&&/ would not fix the problem as the root cause was identified
incorrectly.

This reverts commit aea9a80ba98a0c9b4de88850260e9fbdcc98360b.

Signed-off-by: Aleksey Makarov <aleksey.makarov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Jayachandran C <jnair@caviumnetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8789ea423ccf..56f92d7348bf 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2373,7 +2373,7 @@ static int __init pl011_console_match(struct console *co, char *name, int idx,
 	if (strcmp(name, "qdf2400_e44") == 0) {
 		pr_info_once("UART: Working around QDF2400 SoC erratum 44");
 		qdf2400_e44_present = true;
-	} else if (strcmp(name, "pl011") != 0 || strcmp(name, "ttyAMA") != 0) {
+	} else if (strcmp(name, "pl011") != 0) {
 		return -ENODEV;
 	}
 
-- 
cgit v1.2.3


From 5362544bebe85071188dd9e479b5a5040841c895 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Sat, 4 Mar 2017 14:55:19 +0100
Subject: tty: don't panic on OOM in tty_set_ldisc()

If tty_ldisc_open() fails in tty_set_ldisc(), it tries to go back
to the old discipline or N_TTY. But that can fail as well, in such
case it panics. This is not a graceful way to handle OOM.

Leave ldisc==NULL if all attempts fail instead.
Also use existing tty_ldisc_reinit() helper function instead of
tty_ldisc_restore(). Also don't WARN/BUG in tty_ldisc_reinit()
if N_TTY fails, which would have the same net effect of bringing
kernel down on OOM. Instead print a single line message about
what has happened.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: syzkaller@googlegroups.com
Cc: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldisc.c | 85 ++++++++++---------------------------------------
 1 file changed, 16 insertions(+), 69 deletions(-)

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 68947f6de5ad..c3956ca022e4 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -488,41 +488,6 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 	tty_ldisc_debug(tty, "%p: closed\n", ld);
 }
 
-/**
- *	tty_ldisc_restore	-	helper for tty ldisc change
- *	@tty: tty to recover
- *	@old: previous ldisc
- *
- *	Restore the previous line discipline or N_TTY when a line discipline
- *	change fails due to an open error
- */
-
-static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
-{
-	struct tty_ldisc *new_ldisc;
-	int r;
-
-	/* There is an outstanding reference here so this is safe */
-	old = tty_ldisc_get(tty, old->ops->num);
-	WARN_ON(IS_ERR(old));
-	tty->ldisc = old;
-	tty_set_termios_ldisc(tty, old->ops->num);
-	if (tty_ldisc_open(tty, old) < 0) {
-		tty_ldisc_put(old);
-		/* This driver is always present */
-		new_ldisc = tty_ldisc_get(tty, N_TTY);
-		if (IS_ERR(new_ldisc))
-			panic("n_tty: get");
-		tty->ldisc = new_ldisc;
-		tty_set_termios_ldisc(tty, N_TTY);
-		r = tty_ldisc_open(tty, new_ldisc);
-		if (r < 0)
-			panic("Couldn't open N_TTY ldisc for "
-			      "%s --- error %d.",
-			      tty_name(tty), r);
-	}
-}
-
 /**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
@@ -536,12 +501,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 
 int tty_set_ldisc(struct tty_struct *tty, int disc)
 {
-	int retval;
-	struct tty_ldisc *old_ldisc, *new_ldisc;
-
-	new_ldisc = tty_ldisc_get(tty, disc);
-	if (IS_ERR(new_ldisc))
-		return PTR_ERR(new_ldisc);
+	int retval, old_disc;
 
 	tty_lock(tty);
 	retval = tty_ldisc_lock(tty, 5 * HZ);
@@ -554,7 +514,8 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 	}
 
 	/* Check the no-op case */
-	if (tty->ldisc->ops->num == disc)
+	old_disc = tty->ldisc->ops->num;
+	if (old_disc == disc)
 		goto out;
 
 	if (test_bit(TTY_HUPPED, &tty->flags)) {
@@ -563,34 +524,25 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 		goto out;
 	}
 
-	old_ldisc = tty->ldisc;
-
-	/* Shutdown the old discipline. */
-	tty_ldisc_close(tty, old_ldisc);
-
-	/* Now set up the new line discipline. */
-	tty->ldisc = new_ldisc;
-	tty_set_termios_ldisc(tty, disc);
-
-	retval = tty_ldisc_open(tty, new_ldisc);
+	retval = tty_ldisc_reinit(tty, disc);
 	if (retval < 0) {
 		/* Back to the old one or N_TTY if we can't */
-		tty_ldisc_put(new_ldisc);
-		tty_ldisc_restore(tty, old_ldisc);
+		if (tty_ldisc_reinit(tty, old_disc) < 0) {
+			pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n");
+			if (tty_ldisc_reinit(tty, N_TTY) < 0) {
+				/* At this point we have tty->ldisc == NULL. */
+				pr_err("tty: reinitializing N_TTY failed\n");
+			}
+		}
 	}
 
-	if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) {
+	if (tty->ldisc && tty->ldisc->ops->num != old_disc &&
+	    tty->ops->set_ldisc) {
 		down_read(&tty->termios_rwsem);
 		tty->ops->set_ldisc(tty);
 		up_read(&tty->termios_rwsem);
 	}
 
-	/* At this point we hold a reference to the new ldisc and a
-	   reference to the old ldisc, or we hold two references to
-	   the old ldisc (if it was restored as part of error cleanup
-	   above). In either case, releasing a single reference from
-	   the old ldisc is correct. */
-	new_ldisc = old_ldisc;
 out:
 	tty_ldisc_unlock(tty);
 
@@ -598,7 +550,6 @@ out:
 	   already running */
 	tty_buffer_restart_work(tty->port);
 err:
-	tty_ldisc_put(new_ldisc);	/* drop the extra reference */
 	tty_unlock(tty);
 	return retval;
 }
@@ -659,10 +610,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	int retval;
 
 	ld = tty_ldisc_get(tty, disc);
-	if (IS_ERR(ld)) {
-		BUG_ON(disc == N_TTY);
+	if (IS_ERR(ld))
 		return PTR_ERR(ld);
-	}
 
 	if (tty->ldisc) {
 		tty_ldisc_close(tty, tty->ldisc);
@@ -674,10 +623,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	tty_set_termios_ldisc(tty, disc);
 	retval = tty_ldisc_open(tty, tty->ldisc);
 	if (retval) {
-		if (!WARN_ON(disc == N_TTY)) {
-			tty_ldisc_put(tty->ldisc);
-			tty->ldisc = NULL;
-		}
+		tty_ldisc_put(tty->ldisc);
+		tty->ldisc = NULL;
 	}
 	return retval;
 }
-- 
cgit v1.2.3


From a4a3e061149f09c075f108b6f1cf04d9739a6bc2 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Sat, 4 Mar 2017 13:46:12 +0100
Subject: tty: fix data race in tty_ldisc_ref_wait()

tty_ldisc_ref_wait() checks tty->ldisc under tty->ldisc_sem.
But if ldisc==NULL it releases them sem and reloads
tty->ldisc without holding the sem. This is wrong and
can lead to returning non-NULL ldisc without protection.

Don't reload tty->ldisc second time.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: syzkaller@googlegroups.com
Cc: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldisc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index c3956ca022e4..b0500a0a87b8 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -271,10 +271,13 @@ const struct file_operations tty_ldiscs_proc_fops = {
 
 struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
+	struct tty_ldisc *ld;
+
 	ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT);
-	if (!tty->ldisc)
+	ld = tty->ldisc;
+	if (!ld)
 		ldsem_up_read(&tty->ldisc_sem);
-	return tty->ldisc;
+	return ld;
 }
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 
-- 
cgit v1.2.3


From 8971e1c79d3f6c9a5e6f7a65c50c41f434a4dae6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 17 Mar 2017 16:02:35 +1100
Subject: powerpc/pseries: Don't give a warning when HPT resizing isn't
 available

As of commit 438cc81a41e8 ("powerpc/pseries: Automatically resize HPT
for memory hot add/remove"), when running on the pseries platform, we
always attempt to use the PAPR extension to resize the hashed page
table (HPT) when we add or remove memory.

This is fine, but when the extension is not available we'll give a
harmless, but scary warning. Instead check if the firmware supports HPT
resizing before populating the mmu_hash_ops.resize_hpt pointer.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/lpar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 251060cf1713..8b1fe895daa3 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -751,7 +751,9 @@ void __init hpte_init_pseries(void)
 	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
 	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
 	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
-	mmu_hash_ops.resize_hpt		 = pseries_lpar_resize_hpt;
+
+	if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+		mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
 }
 
 void radix_init_pseries(void)
-- 
cgit v1.2.3


From 5dc855d44c2ad960a86f593c60461f1ae1566b6d Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 16 Mar 2017 12:59:39 -0700
Subject: x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm

If one thread mmaps a perf event while another thread in the same mm
is in some context where active_mm != mm (which can happen in the
scheduler, for example), refresh_pce() would write the wrong value
to CR4.PCE.  This broke some PAPI tests.

Reported-and-tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped")
Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1635c0c8df23..e07b36c5588a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2100,8 +2100,8 @@ static int x86_pmu_event_init(struct perf_event *event)
 
 static void refresh_pce(void *ignored)
 {
-	if (current->mm)
-		load_mm_cr4(current->mm);
+	if (current->active_mm)
+		load_mm_cr4(current->active_mm);
 }
 
 static void x86_pmu_event_mapped(struct perf_event *event)
-- 
cgit v1.2.3


From 4b07372a32c0c1505a7634ad7e607d83340ef645 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 16 Mar 2017 12:59:40 -0700
Subject: x86/perf: Clarify why x86_pmu_event_mapped() isn't racy

Naively, it looks racy, but ->mmap_sem saves it.  Add a comment and a
lockdep assertion.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/03a1e629063899168dfc4707f3bb6e581e21f5c6.1489694270.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e07b36c5588a..183a972f9210 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2109,6 +2109,18 @@ static void x86_pmu_event_mapped(struct perf_event *event)
 	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 		return;
 
+	/*
+	 * This function relies on not being called concurrently in two
+	 * tasks in the same mm.  Otherwise one task could observe
+	 * perf_rdpmc_allowed > 1 and return all the way back to
+	 * userspace with CR4.PCE clear while another task is still
+	 * doing on_each_cpu_mask() to propagate CR4.PCE.
+	 *
+	 * For now, this can't happen because all callers hold mmap_sem
+	 * for write.  If this changes, we'll need a different solution.
+	 */
+	lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+
 	if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
 		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 }
-- 
cgit v1.2.3


From 2cd29f2387be70de9feb4c9f8dbc7c0bd55748ce Mon Sep 17 00:00:00 2001
From: Robert Middleton <robert.middleton@rm5248.com>
Date: Wed, 15 Mar 2017 16:56:47 -0400
Subject: gpio:mcp23s08 Fixed missing interrupts

When an interrupt occurs on an MCP23S08 chip, the INTF register will only
contain one bit as causing the interrupt.  If more than two pins change at
the same time on the chip, this causes one of the pins to not be reported.
This patch fixes the logic for checking if a pin has changed, so that
multiple pins will always cause more than one change.

Cc: stable@vger.kernel.org
Signed-off-by: Robert Middleton <robert.middleton@rm5248.com>
Tested-by: Phil Reid <preid@electromag.com.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mcp23s08.c | 65 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 60 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index bdb692345428..2a57d024481d 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
 static irqreturn_t mcp23s08_irq(int irq, void *data)
 {
 	struct mcp23s08 *mcp = data;
-	int intcap, intf, i;
+	int intcap, intf, i, gpio, gpio_orig, intcap_mask;
 	unsigned int child_irq;
+	bool intf_set, intcap_changed, gpio_bit_changed,
+		defval_changed, gpio_set;
 
 	mutex_lock(&mcp->lock);
 	if (mcp_read(mcp, MCP_INTF, &intf) < 0) {
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data)
 	}
 
 	mcp->cache[MCP_INTCAP] = intcap;
+
+	/* This clears the interrupt(configurable on S18) */
+	if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) {
+		mutex_unlock(&mcp->lock);
+		return IRQ_HANDLED;
+	}
+	gpio_orig = mcp->cache[MCP_GPIO];
+	mcp->cache[MCP_GPIO] = gpio;
 	mutex_unlock(&mcp->lock);
 
+	if (mcp->cache[MCP_INTF] == 0) {
+		/* There is no interrupt pending */
+		return IRQ_HANDLED;
+	}
+
+	dev_dbg(mcp->chip.parent,
+		"intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n",
+		intcap, intf, gpio_orig, gpio);
 
 	for (i = 0; i < mcp->chip.ngpio; i++) {
-		if ((BIT(i) & mcp->cache[MCP_INTF]) &&
-		    ((BIT(i) & intcap & mcp->irq_rise) ||
-		     (mcp->irq_fall & ~intcap & BIT(i)) ||
-		     (BIT(i) & mcp->cache[MCP_INTCON]))) {
+		/* We must check all of the inputs on the chip,
+		 * otherwise we may not notice a change on >=2 pins.
+		 *
+		 * On at least the mcp23s17, INTCAP is only updated
+		 * one byte at a time(INTCAPA and INTCAPB are
+		 * not written to at the same time - only on a per-bank
+		 * basis).
+		 *
+		 * INTF only contains the single bit that caused the
+		 * interrupt per-bank.  On the mcp23s17, there is
+		 * INTFA and INTFB.  If two pins are changed on the A
+		 * side at the same time, INTF will only have one bit
+		 * set.  If one pin on the A side and one pin on the B
+		 * side are changed at the same time, INTF will have
+		 * two bits set.  Thus, INTF can't be the only check
+		 * to see if the input has changed.
+		 */
+
+		intf_set = BIT(i) & mcp->cache[MCP_INTF];
+		if (i < 8 && intf_set)
+			intcap_mask = 0x00FF;
+		else if (i >= 8 && intf_set)
+			intcap_mask = 0xFF00;
+		else
+			intcap_mask = 0x00;
+
+		intcap_changed = (intcap_mask &
+			(BIT(i) & mcp->cache[MCP_INTCAP])) !=
+			(intcap_mask & (BIT(i) & gpio_orig));
+		gpio_set = BIT(i) & mcp->cache[MCP_GPIO];
+		gpio_bit_changed = (BIT(i) & gpio_orig) !=
+			(BIT(i) & mcp->cache[MCP_GPIO]);
+		defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) &&
+			((BIT(i) & mcp->cache[MCP_GPIO]) !=
+			(BIT(i) & mcp->cache[MCP_DEFVAL]));
+
+		if (((gpio_bit_changed || intcap_changed) &&
+			(BIT(i) & mcp->irq_rise) && gpio_set) ||
+		    ((gpio_bit_changed || intcap_changed) &&
+			(BIT(i) & mcp->irq_fall) && !gpio_set) ||
+		    defval_changed) {
 			child_irq = irq_find_mapping(mcp->chip.irqdomain, i);
 			handle_nested_irq(child_irq);
 		}
-- 
cgit v1.2.3


From 4938ca90166d6d3061793789e2eef42cd934fa97 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Thu, 9 Mar 2017 10:09:44 +0800
Subject: drm/i915/gvt: handle force-nonpriv registers, cmd parser part

this patch adds force non-priv registers check in LRI cmds handler

v4:
transform is_force_nonpriv_mmio() from macro to inline fuction to eliminate
checkpatch warning

v3:
per zhenyu's comment, fix some style warnings

v2:
per zhenyu's comment, refine the code to remove cascaded ifs

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/handlers.c   | 17 +++++++++++++++++
 drivers/gpu/drm/i915/gvt/mmio.h       |  3 +++
 3 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 7ae6e2b241c8..919c83abaeb1 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -817,6 +817,25 @@ static bool is_shadowed_mmio(unsigned int offset)
 	return ret;
 }
 
+static inline bool is_force_nonpriv_mmio(unsigned int offset)
+{
+	return (offset >= 0x24d0 && offset < 0x2500);
+}
+
+static int force_nonpriv_reg_handler(struct parser_exec_state *s,
+				     unsigned int offset, unsigned int index)
+{
+	struct intel_gvt *gvt = s->vgpu->gvt;
+	unsigned int data = cmd_val(s, index + 1);
+
+	if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) {
+		gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n",
+			offset, data);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int cmd_reg_handler(struct parser_exec_state *s,
 	unsigned int offset, unsigned int index, char *cmd)
 {
@@ -841,6 +860,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 		return 0;
 	}
 
+	if (is_force_nonpriv_mmio(offset) &&
+	    force_nonpriv_reg_handler(s, offset, index))
+		return -EINVAL;
+
 	if (offset == i915_mmio_reg_offset(DERRMR) ||
 		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
 		/* Writing to HW VGT_PVINFO_PAGE offset will be discarded */
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 8e43395c748a..de975f40aebf 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2988,3 +2988,20 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	write_vreg(vgpu, offset, p_data, bytes);
 	return 0;
 }
+
+/**
+ * intel_gvt_in_force_nonpriv_whitelist - if a mmio is in whitelist to be
+ * force-nopriv register
+ *
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if the register is in force-nonpriv whitelist;
+ * False if outside;
+ */
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+					  unsigned int offset)
+{
+	return in_whitelist(offset);
+}
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index 3bc620f56f35..a3a027025cd0 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -107,4 +107,7 @@ int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 				 void *p_data, unsigned int bytes);
 int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 				  void *p_data, unsigned int bytes);
+
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+					  unsigned int offset);
 #endif
-- 
cgit v1.2.3


From 695fbc08d80f93ecca18a1abd8f52c2ab77fdc8d Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Fri, 10 Mar 2017 04:26:53 -0500
Subject: drm/i915/gvt: replace the gvt_err with gvt_vgpu_err

gvt_err should be used only for the very few critical error message
during host i915 drvier initialization. This patch
1. removes the redundant gvt_err;
2. creates a new gvt_vgpu_err to show errors caused by vgpu;
3. replaces the most gvt_err with gvt_vgpu_err;
4. leaves very few gvt_err for dumping gvt error during host gvt
   initialization.

v2. change name to gvt_vgpu_err and add vgpu id to the message. (Kevin)
    add gpu id to gvt_vgpu_err. (Zhi)
v3. remove gpu id from gvt_vgpu_err caller. (Zhi)
v4. add vgpu check to the gvt_vgpu_err macro. (Zhiyuan)
v5. add comments for v3 and v4.
v6. split the big patch into two, with this patch only for checking
    gvt_vgpu_err. (Zhenyu)
v7. rebase to staging branch
v8. rebase to fix branch

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c |  8 ++--
 drivers/gpu/drm/i915/gvt/cmd_parser.c  | 84 +++++++++++++++++++---------------
 drivers/gpu/drm/i915/gvt/debug.h       |  8 ++++
 drivers/gpu/drm/i915/gvt/edid.c        | 13 +++---
 drivers/gpu/drm/i915/gvt/execlist.c    | 29 ++++++------
 drivers/gpu/drm/i915/gvt/gtt.c         | 74 +++++++++++++++---------------
 drivers/gpu/drm/i915/gvt/handlers.c    | 28 ++++++------
 drivers/gpu/drm/i915/gvt/kvmgt.c       | 33 +++++++------
 drivers/gpu/drm/i915/gvt/mmio.c        | 38 +++++++--------
 drivers/gpu/drm/i915/gvt/opregion.c    | 10 ++--
 drivers/gpu/drm/i915/gvt/render.c      |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c   | 11 +++--
 12 files changed, 180 insertions(+), 158 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 3b6caaca9751..325618d969fe 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -242,7 +242,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
 	const char *item;
 
 	if (!param->low_gm_sz || !param->high_gm_sz || !param->fence_sz) {
-		gvt_err("Invalid vGPU creation params\n");
+		gvt_vgpu_err("Invalid vGPU creation params\n");
 		return -EINVAL;
 	}
 
@@ -285,9 +285,9 @@ static int alloc_resource(struct intel_vgpu *vgpu,
 	return 0;
 
 no_enough_resource:
-	gvt_err("vgpu%d: fail to allocate resource %s\n", vgpu->id, item);
-	gvt_err("vgpu%d: request %luMB avail %luMB max %luMB taken %luMB\n",
-		vgpu->id, BYTES_TO_MB(request), BYTES_TO_MB(avail),
+	gvt_vgpu_err("fail to allocate resource %s\n", item);
+	gvt_vgpu_err("request %luMB avail %luMB max %luMB taken %luMB\n",
+		BYTES_TO_MB(request), BYTES_TO_MB(avail),
 		BYTES_TO_MB(max), BYTES_TO_MB(taken));
 	return -ENOSPC;
 }
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 919c83abaeb1..2ca0506d8d8c 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -843,20 +843,19 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	struct intel_gvt *gvt = vgpu->gvt;
 
 	if (offset + 4 > gvt->device_info.mmio_size) {
-		gvt_err("%s access to (%x) outside of MMIO range\n",
+		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
 				cmd, offset);
 		return -EINVAL;
 	}
 
 	if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) {
-		gvt_err("vgpu%d: %s access to non-render register (%x)\n",
-				s->vgpu->id, cmd, offset);
+		gvt_vgpu_err("%s access to non-render register (%x)\n",
+				cmd, offset);
 		return 0;
 	}
 
 	if (is_shadowed_mmio(offset)) {
-		gvt_err("vgpu%d: found access of shadowed MMIO %x\n",
-				s->vgpu->id, offset);
+		gvt_vgpu_err("found access of shadowed MMIO %x\n", offset);
 		return 0;
 	}
 
@@ -1152,6 +1151,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		struct mi_display_flip_command_info *info)
 {
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
+	struct intel_vgpu *vgpu = s->vgpu;
 	u32 dword0 = cmd_val(s, 0);
 	u32 dword1 = cmd_val(s, 1);
 	u32 dword2 = cmd_val(s, 2);
@@ -1190,7 +1190,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		break;
 
 	default:
-		gvt_err("unknown plane code %d\n", plane);
+		gvt_vgpu_err("unknown plane code %d\n", plane);
 		return -EINVAL;
 	}
 
@@ -1297,25 +1297,26 @@ static int update_plane_mmio_from_mi_display_flip(
 static int cmd_handler_mi_display_flip(struct parser_exec_state *s)
 {
 	struct mi_display_flip_command_info info;
+	struct intel_vgpu *vgpu = s->vgpu;
 	int ret;
 	int i;
 	int len = cmd_length(s);
 
 	ret = decode_mi_display_flip(s, &info);
 	if (ret) {
-		gvt_err("fail to decode MI display flip command\n");
+		gvt_vgpu_err("fail to decode MI display flip command\n");
 		return ret;
 	}
 
 	ret = check_mi_display_flip(s, &info);
 	if (ret) {
-		gvt_err("invalid MI display flip command\n");
+		gvt_vgpu_err("invalid MI display flip command\n");
 		return ret;
 	}
 
 	ret = update_plane_mmio_from_mi_display_flip(s, &info);
 	if (ret) {
-		gvt_err("fail to update plane mmio\n");
+		gvt_vgpu_err("fail to update plane mmio\n");
 		return ret;
 	}
 
@@ -1373,7 +1374,8 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	int ret;
 
 	if (op_size > max_surface_size) {
-		gvt_err("command address audit fail name %s\n", s->info->name);
+		gvt_vgpu_err("command address audit fail name %s\n",
+			s->info->name);
 		return -EINVAL;
 	}
 
@@ -1390,7 +1392,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	}
 	return 0;
 err:
-	gvt_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
+	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
 			s->info->name, guest_gma, op_size);
 
 	pr_err("cmd dump: ");
@@ -1435,8 +1437,10 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s)
 
 static inline int unexpected_cmd(struct parser_exec_state *s)
 {
-	gvt_err("vgpu%d: Unexpected %s in command buffer!\n",
-			s->vgpu->id, s->info->name);
+	struct intel_vgpu *vgpu = s->vgpu;
+
+	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+
 	return -EINVAL;
 }
 
@@ -1539,7 +1543,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
 	while (gma != end_gma) {
 		gpa = intel_vgpu_gma_to_gpa(mm, gma);
 		if (gpa == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("invalid gma address: %lx\n", gma);
+			gvt_vgpu_err("invalid gma address: %lx\n", gma);
 			return -EFAULT;
 		}
 
@@ -1580,6 +1584,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 	uint32_t bb_size = 0;
 	uint32_t cmd_len = 0;
 	bool met_bb_end = false;
+	struct intel_vgpu *vgpu = s->vgpu;
 	u32 cmd;
 
 	/* get the start gm address of the batch buffer */
@@ -1588,7 +1593,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
@@ -1597,7 +1602,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 				gma, gma + 4, &cmd);
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
-			gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 			return -EINVAL;
 		}
@@ -1622,6 +1627,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 static int perform_bb_shadow(struct parser_exec_state *s)
 {
 	struct intel_shadow_bb_entry *entry_obj;
+	struct intel_vgpu *vgpu = s->vgpu;
 	unsigned long gma = 0;
 	uint32_t bb_size;
 	void *dst = NULL;
@@ -1656,7 +1662,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false);
 	if (ret) {
-		gvt_err("failed to set shadow batch to CPU\n");
+		gvt_vgpu_err("failed to set shadow batch to CPU\n");
 		goto unmap_src;
 	}
 
@@ -1668,7 +1674,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 			      gma, gma + bb_size,
 			      dst);
 	if (ret) {
-		gvt_err("fail to copy guest ring buffer\n");
+		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		goto unmap_src;
 	}
 
@@ -1699,15 +1705,16 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 {
 	bool second_level;
 	int ret = 0;
+	struct intel_vgpu *vgpu = s->vgpu;
 
 	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
-		gvt_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
+		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
 		return -EINVAL;
 	}
 
 	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
 	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
-		gvt_err("Jumping to 2nd level BB from RB is not allowed\n");
+		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
 		return -EINVAL;
 	}
 
@@ -1725,7 +1732,7 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 	if (batch_buffer_needs_scan(s)) {
 		ret = perform_bb_shadow(s);
 		if (ret < 0)
-			gvt_err("invalid shadow batch buffer\n");
+			gvt_vgpu_err("invalid shadow batch buffer\n");
 	} else {
 		/* emulate a batch buffer end to do return right */
 		ret = cmd_handler_mi_batch_buffer_end(s);
@@ -2452,6 +2459,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 	int ret = 0;
 	cycles_t t0, t1, t2;
 	struct parser_exec_state s_before_advance_custom;
+	struct intel_vgpu *vgpu = s->vgpu;
 
 	t0 = get_cycles();
 
@@ -2459,7 +2467,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
@@ -2475,7 +2483,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 	if (info->handler) {
 		ret = info->handler(s);
 		if (ret < 0) {
-			gvt_err("%s handler error\n", info->name);
+			gvt_vgpu_err("%s handler error\n", info->name);
 			return ret;
 		}
 	}
@@ -2486,7 +2494,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 	if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
 		ret = cmd_advance_default(s);
 		if (ret) {
-			gvt_err("%s IP advance error\n", info->name);
+			gvt_vgpu_err("%s IP advance error\n", info->name);
 			return ret;
 		}
 	}
@@ -2509,6 +2517,7 @@ static int command_scan(struct parser_exec_state *s,
 
 	unsigned long gma_head, gma_tail, gma_bottom;
 	int ret = 0;
+	struct intel_vgpu *vgpu = s->vgpu;
 
 	gma_head = rb_start + rb_head;
 	gma_tail = rb_start + rb_tail;
@@ -2520,7 +2529,7 @@ static int command_scan(struct parser_exec_state *s,
 		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
 			if (!(s->ip_gma >= rb_start) ||
 				!(s->ip_gma < gma_bottom)) {
-				gvt_err("ip_gma %lx out of ring scope."
+				gvt_vgpu_err("ip_gma %lx out of ring scope."
 					"(base:0x%lx, bottom: 0x%lx)\n",
 					s->ip_gma, rb_start,
 					gma_bottom);
@@ -2528,7 +2537,7 @@ static int command_scan(struct parser_exec_state *s,
 				return -EINVAL;
 			}
 			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
-				gvt_err("ip_gma %lx out of range."
+				gvt_vgpu_err("ip_gma %lx out of range."
 					"base 0x%lx head 0x%lx tail 0x%lx\n",
 					s->ip_gma, rb_start,
 					rb_head, rb_tail);
@@ -2538,7 +2547,7 @@ static int command_scan(struct parser_exec_state *s,
 		}
 		ret = cmd_parser_exec(s);
 		if (ret) {
-			gvt_err("cmd parser error\n");
+			gvt_vgpu_err("cmd parser error\n");
 			parser_exec_state_dump(s);
 			break;
 		}
@@ -2662,7 +2671,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 				gma_head, gma_top,
 				workload->shadow_ring_buffer_va);
 		if (ret) {
-			gvt_err("fail to copy guest ring buffer\n");
+			gvt_vgpu_err("fail to copy guest ring buffer\n");
 			return ret;
 		}
 		copy_len = gma_top - gma_head;
@@ -2674,7 +2683,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 			gma_head, gma_tail,
 			workload->shadow_ring_buffer_va + copy_len);
 	if (ret) {
-		gvt_err("fail to copy guest ring buffer\n");
+		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		return ret;
 	}
 	ring->tail += workload->rb_len;
@@ -2685,16 +2694,17 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 {
 	int ret;
+	struct intel_vgpu *vgpu = workload->vgpu;
 
 	ret = shadow_workload_ring_buffer(workload);
 	if (ret) {
-		gvt_err("fail to shadow workload ring_buffer\n");
+		gvt_vgpu_err("fail to shadow workload ring_buffer\n");
 		return ret;
 	}
 
 	ret = scan_workload(workload);
 	if (ret) {
-		gvt_err("scan workload error\n");
+		gvt_vgpu_err("scan workload error\n");
 		return ret;
 	}
 	return 0;
@@ -2704,6 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ctx_size = wa_ctx->indirect_ctx.size;
 	unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
+	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
 	void *map;
@@ -2717,14 +2728,14 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	/* get the va of the shadow batch buffer */
 	map = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(map)) {
-		gvt_err("failed to vmap shadow indirect ctx\n");
+		gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
 		ret = PTR_ERR(map);
 		goto put_obj;
 	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
 	if (ret) {
-		gvt_err("failed to set shadow indirect ctx to CPU\n");
+		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
 		goto unmap_src;
 	}
 
@@ -2733,7 +2744,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 				guest_gma, guest_gma + ctx_size,
 				map);
 	if (ret) {
-		gvt_err("fail to copy guest indirect ctx\n");
+		gvt_vgpu_err("fail to copy guest indirect ctx\n");
 		goto unmap_src;
 	}
 
@@ -2767,13 +2778,14 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ret;
+	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
 
 	if (wa_ctx->indirect_ctx.size == 0)
 		return 0;
 
 	ret = shadow_indirect_ctx(wa_ctx);
 	if (ret) {
-		gvt_err("fail to shadow indirect ctx\n");
+		gvt_vgpu_err("fail to shadow indirect ctx\n");
 		return ret;
 	}
 
@@ -2781,7 +2793,7 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 
 	ret = scan_wa_ctx(wa_ctx);
 	if (ret) {
-		gvt_err("scan wa ctx error\n");
+		gvt_vgpu_err("scan wa ctx error\n");
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
index 68cba7bd980a..b0cff4dc2684 100644
--- a/drivers/gpu/drm/i915/gvt/debug.h
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -27,6 +27,14 @@
 #define gvt_err(fmt, args...) \
 	DRM_ERROR("gvt: "fmt, ##args)
 
+#define gvt_vgpu_err(fmt, args...)					\
+do {									\
+	if (IS_ERR_OR_NULL(vgpu))					\
+		DRM_DEBUG_DRIVER("gvt: "fmt, ##args);			\
+	else								\
+		DRM_DEBUG_DRIVER("gvt: vgpu %d: "fmt, vgpu->id, ##args);\
+} while (0)
+
 #define gvt_dbg_core(fmt, args...) \
 	DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args)
 
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index bda85dff7b2a..f1648fe5e5ea 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -52,16 +52,16 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
 	unsigned char chr = 0;
 
 	if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) {
-		gvt_err("Driver tries to read EDID without proper sequence!\n");
+		gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n");
 		return 0;
 	}
 	if (edid->current_edid_read >= EDID_SIZE) {
-		gvt_err("edid_get_byte() exceeds the size of EDID!\n");
+		gvt_vgpu_err("edid_get_byte() exceeds the size of EDID!\n");
 		return 0;
 	}
 
 	if (!edid->edid_available) {
-		gvt_err("Reading EDID but EDID is not available!\n");
+		gvt_vgpu_err("Reading EDID but EDID is not available!\n");
 		return 0;
 	}
 
@@ -72,7 +72,7 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
 		chr = edid_data->edid_block[edid->current_edid_read];
 		edid->current_edid_read++;
 	} else {
-		gvt_err("No EDID available during the reading?\n");
+		gvt_vgpu_err("No EDID available during the reading?\n");
 	}
 	return chr;
 }
@@ -223,7 +223,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
 			break;
 		default:
-			gvt_err("Unknown/reserved GMBUS cycle detected!\n");
+			gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n");
 			break;
 		}
 		/*
@@ -292,8 +292,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		 */
 	} else {
 		memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
-		gvt_err("vgpu%d: warning: gmbus3 read with nothing returned\n",
-				vgpu->id);
+		gvt_vgpu_err("warning: gmbus3 read with nothing returned\n");
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 46eb9fd3c03f..f1f426a97aa9 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -172,6 +172,7 @@ static int emulate_execlist_ctx_schedule_out(
 		struct intel_vgpu_execlist *execlist,
 		struct execlist_ctx_descriptor_format *ctx)
 {
+	struct intel_vgpu *vgpu = execlist->vgpu;
 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
 	struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
@@ -183,7 +184,7 @@ static int emulate_execlist_ctx_schedule_out(
 	gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
 
 	if (WARN_ON(!same_context(ctx, execlist->running_context))) {
-		gvt_err("schedule out context is not running context,"
+		gvt_vgpu_err("schedule out context is not running context,"
 				"ctx id %x running ctx id %x\n",
 				ctx->context_id,
 				execlist->running_context->context_id);
@@ -254,7 +255,7 @@ static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
 	status.udw = vgpu_vreg(vgpu, status_reg + 4);
 
 	if (status.execlist_queue_full) {
-		gvt_err("virtual execlist slots are full\n");
+		gvt_vgpu_err("virtual execlist slots are full\n");
 		return NULL;
 	}
 
@@ -270,11 +271,12 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
 
 	struct execlist_ctx_descriptor_format *ctx0, *ctx1;
 	struct execlist_context_status_format status;
+	struct intel_vgpu *vgpu = execlist->vgpu;
 
 	gvt_dbg_el("emulate schedule-in\n");
 
 	if (!slot) {
-		gvt_err("no available execlist slot\n");
+		gvt_vgpu_err("no available execlist slot\n");
 		return -EINVAL;
 	}
 
@@ -375,7 +377,6 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 
 		vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
 		if (IS_ERR(vma)) {
-			gvt_err("Cannot pin\n");
 			return;
 		}
 
@@ -428,7 +429,6 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
 				       0, CACHELINE_BYTES, 0);
 	if (IS_ERR(vma)) {
-		gvt_err("Cannot pin indirect ctx obj\n");
 		return;
 	}
 
@@ -561,6 +561,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 {
 	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
 	struct intel_vgpu_mm *mm;
+	struct intel_vgpu *vgpu = workload->vgpu;
 	int page_table_level;
 	u32 pdp[8];
 
@@ -569,7 +570,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 	} else if (desc->addressing_mode == 3) { /* legacy 64 bit */
 		page_table_level = 4;
 	} else {
-		gvt_err("Advanced Context mode(SVM) is not supported!\n");
+		gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
 		return -EINVAL;
 	}
 
@@ -583,7 +584,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 		mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
 				pdp, page_table_level, 0);
 		if (IS_ERR(mm)) {
-			gvt_err("fail to create mm object.\n");
+			gvt_vgpu_err("fail to create mm object.\n");
 			return PTR_ERR(mm);
 		}
 	}
@@ -609,7 +610,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
 			(u32)((desc->lrca + 1) << GTT_PAGE_SHIFT));
 	if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
-		gvt_err("invalid guest context LRCA: %x\n", desc->lrca);
+		gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
 		return -EINVAL;
 	}
 
@@ -724,8 +725,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 			continue;
 
 		if (!desc[i]->privilege_access) {
-			gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
-					vgpu->id);
+			gvt_vgpu_err("unexpected GGTT elsp submission\n");
 			return -EINVAL;
 		}
 
@@ -735,15 +735,13 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 	}
 
 	if (!valid_desc_bitmap) {
-		gvt_err("vgpu%d: no valid desc in a elsp submission\n",
-				vgpu->id);
+		gvt_vgpu_err("no valid desc in a elsp submission\n");
 		return -EINVAL;
 	}
 
 	if (!test_bit(0, (void *)&valid_desc_bitmap) &&
 			test_bit(1, (void *)&valid_desc_bitmap)) {
-		gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
-				vgpu->id);
+		gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n");
 		return -EINVAL;
 	}
 
@@ -752,8 +750,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 		ret = submit_context(vgpu, ring_id, &valid_desc[i],
 				emulate_schedule_in);
 		if (ret) {
-			gvt_err("vgpu%d: fail to schedule workload\n",
-					vgpu->id);
+			gvt_vgpu_err("fail to schedule workload\n");
 			return ret;
 		}
 		emulate_schedule_in = false;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 6a5ff23ded90..da7312715824 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -49,8 +49,8 @@ bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
 {
 	if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
 			&& !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
-		gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n",
-				vgpu->id, addr, size);
+		gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
+				addr, size);
 		return false;
 	}
 	return true;
@@ -430,7 +430,7 @@ static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p,
 
 	mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
 	if (mfn == INTEL_GVT_INVALID_ADDR) {
-		gvt_err("fail to translate gfn: 0x%lx\n", gfn);
+		gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn);
 		return -ENXIO;
 	}
 
@@ -611,7 +611,7 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu,
 
 	daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(kdev, daddr)) {
-		gvt_err("fail to map dma addr\n");
+		gvt_vgpu_err("fail to map dma addr\n");
 		return -EINVAL;
 	}
 
@@ -735,7 +735,7 @@ retry:
 		if (reclaim_one_mm(vgpu->gvt))
 			goto retry;
 
-		gvt_err("fail to allocate ppgtt shadow page\n");
+		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -750,14 +750,14 @@ retry:
 	 */
 	ret = init_shadow_page(vgpu, &spt->shadow_page, type);
 	if (ret) {
-		gvt_err("fail to initialize shadow page for spt\n");
+		gvt_vgpu_err("fail to initialize shadow page for spt\n");
 		goto err;
 	}
 
 	ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page,
 			gfn, ppgtt_write_protection_handler, NULL);
 	if (ret) {
-		gvt_err("fail to initialize guest page for spt\n");
+		gvt_vgpu_err("fail to initialize guest page for spt\n");
 		goto err;
 	}
 
@@ -776,8 +776,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page(
 	if (p)
 		return shadow_page_to_ppgtt_spt(p);
 
-	gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n",
-			vgpu->id, mfn);
+	gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn);
 	return NULL;
 }
 
@@ -827,8 +826,8 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
 	}
 	s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
 	if (!s) {
-		gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
-				vgpu->id, ops->get_pfn(e));
+		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
+				ops->get_pfn(e));
 		return -ENXIO;
 	}
 	return ppgtt_invalidate_shadow_page(s);
@@ -836,6 +835,7 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
 
 static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 {
+	struct intel_vgpu *vgpu = spt->vgpu;
 	struct intel_gvt_gtt_entry e;
 	unsigned long index;
 	int ret;
@@ -854,7 +854,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 
 	for_each_present_shadow_entry(spt, &e, index) {
 		if (!gtt_type_is_pt(get_next_pt_type(e.type))) {
-			gvt_err("GVT doesn't support pse bit for now\n");
+			gvt_vgpu_err("GVT doesn't support pse bit for now\n");
 			return -EINVAL;
 		}
 		ret = ppgtt_invalidate_shadow_page_by_shadow_entry(
@@ -868,8 +868,8 @@ release:
 	ppgtt_free_shadow_page(spt);
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n",
-			spt->vgpu->id, spt, e.val64, e.type);
+	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
+			spt, e.val64, e.type);
 	return ret;
 }
 
@@ -914,8 +914,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry(
 	}
 	return s;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, s, we->val64, we->type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			s, we->val64, we->type);
 	return ERR_PTR(ret);
 }
 
@@ -953,7 +953,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 
 	for_each_present_guest_entry(spt, &ge, i) {
 		if (!gtt_type_is_pt(get_next_pt_type(ge.type))) {
-			gvt_err("GVT doesn't support pse bit now\n");
+			gvt_vgpu_err("GVT doesn't support pse bit now\n");
 			ret = -EINVAL;
 			goto fail;
 		}
@@ -969,8 +969,8 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 	}
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, spt, ge.val64, ge.type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			spt, ge.val64, ge.type);
 	return ret;
 }
 
@@ -999,7 +999,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 		struct intel_vgpu_ppgtt_spt *s =
 			ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e));
 		if (!s) {
-			gvt_err("fail to find guest page\n");
+			gvt_vgpu_err("fail to find guest page\n");
 			ret = -ENXIO;
 			goto fail;
 		}
@@ -1011,8 +1011,8 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 	ppgtt_set_shadow_entry(spt, &e, index);
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, spt, e.val64, e.type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			spt, e.val64, e.type);
 	return ret;
 }
 
@@ -1046,8 +1046,8 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt,
 	}
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id,
-			spt, we->val64, we->type);
+	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
+		spt, we->val64, we->type);
 	return ret;
 }
 
@@ -1250,8 +1250,8 @@ static int ppgtt_handle_guest_write_page_table(
 	}
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n",
-			vgpu->id, spt, we->val64, we->type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
+			spt, we->val64, we->type);
 	return ret;
 }
 
@@ -1493,7 +1493,7 @@ static int shadow_mm(struct intel_vgpu_mm *mm)
 
 		spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
 		if (IS_ERR(spt)) {
-			gvt_err("fail to populate guest root pointer\n");
+			gvt_vgpu_err("fail to populate guest root pointer\n");
 			ret = PTR_ERR(spt);
 			goto fail;
 		}
@@ -1566,7 +1566,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
 
 	ret = gtt->mm_alloc_page_table(mm);
 	if (ret) {
-		gvt_err("fail to allocate page table for mm\n");
+		gvt_vgpu_err("fail to allocate page table for mm\n");
 		goto fail;
 	}
 
@@ -1584,7 +1584,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
 	}
 	return mm;
 fail:
-	gvt_err("fail to create mm\n");
+	gvt_vgpu_err("fail to create mm\n");
 	if (mm)
 		intel_gvt_mm_unreference(mm);
 	return ERR_PTR(ret);
@@ -1760,7 +1760,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 			mm->page_table_level, gma, gpa);
 	return gpa;
 err:
-	gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma);
+	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
 	return INTEL_GVT_INVALID_ADDR;
 }
 
@@ -1836,8 +1836,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	if (ops->test_present(&e)) {
 		ret = gtt_entry_p2m(vgpu, &e, &m);
 		if (ret) {
-			gvt_err("vgpu%d: fail to translate guest gtt entry\n",
-					vgpu->id);
+			gvt_vgpu_err("fail to translate guest gtt entry\n");
 			return ret;
 		}
 	} else {
@@ -1893,14 +1892,14 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
 
 	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!scratch_pt) {
-		gvt_err("fail to allocate scratch page\n");
+		gvt_vgpu_err("fail to allocate scratch page\n");
 		return -ENOMEM;
 	}
 
 	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
 			4096, PCI_DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, daddr)) {
-		gvt_err("fail to dmamap scratch_pt\n");
+		gvt_vgpu_err("fail to dmamap scratch_pt\n");
 		__free_page(virt_to_page(scratch_pt));
 		return -ENOMEM;
 	}
@@ -2003,7 +2002,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 	ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT,
 			NULL, 1, 0);
 	if (IS_ERR(ggtt_mm)) {
-		gvt_err("fail to create mm for ggtt.\n");
+		gvt_vgpu_err("fail to create mm for ggtt.\n");
 		return PTR_ERR(ggtt_mm);
 	}
 
@@ -2076,7 +2075,6 @@ static int setup_spt_oos(struct intel_gvt *gvt)
 	for (i = 0; i < preallocated_oos_pages; i++) {
 		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
 		if (!oos_page) {
-			gvt_err("fail to pre-allocate oos page\n");
 			ret = -ENOMEM;
 			goto fail;
 		}
@@ -2166,7 +2164,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
 		mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT,
 				pdp, page_table_level, 0);
 		if (IS_ERR(mm)) {
-			gvt_err("fail to create mm\n");
+			gvt_vgpu_err("fail to create mm\n");
 			return PTR_ERR(mm);
 		}
 	}
@@ -2196,7 +2194,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
 
 	mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
 	if (!mm) {
-		gvt_err("fail to find ppgtt instance.\n");
+		gvt_vgpu_err("fail to find ppgtt instance.\n");
 		return -EINVAL;
 	}
 	intel_gvt_mm_unreference(mm);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index de975f40aebf..eaff45d417e8 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -181,11 +181,9 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
 					GVT_FAILSAFE_UNSUPPORTED_GUEST);
 
 		if (!vgpu->mmio.disable_warn_untrack) {
-			gvt_err("vgpu%d: found oob fence register access\n",
-					vgpu->id);
-			gvt_err("vgpu%d: total fence %d, access fence %d\n",
-					vgpu->id, vgpu_fence_sz(vgpu),
-					fence_num);
+			gvt_vgpu_err("found oob fence register access\n");
+			gvt_vgpu_err("total fence %d, access fence %d\n",
+					vgpu_fence_sz(vgpu), fence_num);
 		}
 		memset(p_data, 0, bytes);
 		return -EINVAL;
@@ -249,7 +247,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu,
 			break;
 		default:
 			/*should not hit here*/
-			gvt_err("invalid forcewake offset 0x%x\n", offset);
+			gvt_vgpu_err("invalid forcewake offset 0x%x\n", offset);
 			return -EINVAL;
 		}
 	} else {
@@ -530,7 +528,7 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu,
 		fdi_tx_train_bits = FDI_LINK_TRAIN_PATTERN_2;
 		fdi_iir_check_bits = FDI_RX_SYMBOL_LOCK;
 	} else {
-		gvt_err("Invalid train pattern %d\n", train_pattern);
+		gvt_vgpu_err("Invalid train pattern %d\n", train_pattern);
 		return -EINVAL;
 	}
 
@@ -588,7 +586,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
 	else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX)
 		index = FDI_RX_IMR_TO_PIPE(offset);
 	else {
-		gvt_err("Unsupport registers %x\n", offset);
+		gvt_vgpu_err("Unsupport registers %x\n", offset);
 		return -EINVAL;
 	}
 
@@ -818,7 +816,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
 	u32 data;
 
 	if (!dpy_is_valid_port(port_index)) {
-		gvt_err("GVT(%d): Unsupported DP port access!\n", vgpu->id);
+		gvt_vgpu_err("Unsupported DP port access!\n");
 		return 0;
 	}
 
@@ -1016,8 +1014,7 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu,
 
 	if (i == num) {
 		if (num == SBI_REG_MAX) {
-			gvt_err("vgpu%d: SBI caching meets maximum limits\n",
-					vgpu->id);
+			gvt_vgpu_err("SBI caching meets maximum limits\n");
 			return;
 		}
 		display->sbi.number++;
@@ -1097,7 +1094,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		break;
 	}
 	if (invalid_read)
-		gvt_err("invalid pvinfo read: [%x:%x] = %x\n",
+		gvt_vgpu_err("invalid pvinfo read: [%x:%x] = %x\n",
 				offset, bytes, *(u32 *)p_data);
 	vgpu->pv_notified = true;
 	return 0;
@@ -1125,7 +1122,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
 	case 1:	/* Remove this in guest driver. */
 		break;
 	default:
-		gvt_err("Invalid PV notification %d\n", notification);
+		gvt_vgpu_err("Invalid PV notification %d\n", notification);
 	}
 	return ret;
 }
@@ -1181,7 +1178,7 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
 		break;
 	default:
-		gvt_err("invalid pvinfo write offset %x bytes %x data %x\n",
+		gvt_vgpu_err("invalid pvinfo write offset %x bytes %x data %x\n",
 				offset, bytes, data);
 		break;
 	}
@@ -1415,7 +1412,8 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	if (execlist->elsp_dwords.index == 3) {
 		ret = intel_vgpu_submit_execlist(vgpu, ring_id);
 		if(ret)
-			gvt_err("fail submit workload on ring %d\n", ring_id);
+			gvt_vgpu_err("fail submit workload on ring %d\n",
+				ring_id);
 	}
 
 	++execlist->elsp_dwords.index;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 84d801638ede..cd218b07c6f6 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -426,7 +426,7 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
 
 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 {
-	struct intel_vgpu *vgpu;
+	struct intel_vgpu *vgpu = NULL;
 	struct intel_vgpu_type *type;
 	struct device *pdev;
 	void *gvt;
@@ -437,7 +437,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 
 	type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
 	if (!type) {
-		gvt_err("failed to find type %s to create\n",
+		gvt_vgpu_err("failed to find type %s to create\n",
 						kobject_name(kobj));
 		ret = -EINVAL;
 		goto out;
@@ -446,7 +446,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 	vgpu = intel_gvt_ops->vgpu_create(gvt, type);
 	if (IS_ERR_OR_NULL(vgpu)) {
 		ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
-		gvt_err("failed to create intel vgpu: %d\n", ret);
+		gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
 		goto out;
 	}
 
@@ -526,7 +526,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
 				&vgpu->vdev.iommu_notifier);
 	if (ret != 0) {
-		gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
+		gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
+			ret);
 		goto out;
 	}
 
@@ -534,7 +535,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
 				&vgpu->vdev.group_notifier);
 	if (ret != 0) {
-		gvt_err("vfio_register_notifier for group failed: %d\n", ret);
+		gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
+			ret);
 		goto undo_iommu;
 	}
 
@@ -635,7 +637,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 
 
 	if (index >= VFIO_PCI_NUM_REGIONS) {
-		gvt_err("invalid index: %u\n", index);
+		gvt_vgpu_err("invalid index: %u\n", index);
 		return -EINVAL;
 	}
 
@@ -669,7 +671,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	case VFIO_PCI_VGA_REGION_INDEX:
 	case VFIO_PCI_ROM_REGION_INDEX:
 	default:
-		gvt_err("unsupported region: %u\n", index);
+		gvt_vgpu_err("unsupported region: %u\n", index);
 	}
 
 	return ret == 0 ? count : ret;
@@ -861,7 +863,7 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
 
 		trigger = eventfd_ctx_fdget(fd);
 		if (IS_ERR(trigger)) {
-			gvt_err("eventfd_ctx_fdget failed\n");
+			gvt_vgpu_err("eventfd_ctx_fdget failed\n");
 			return PTR_ERR(trigger);
 		}
 		vgpu->vdev.msi_trigger = trigger;
@@ -1120,7 +1122,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
 						VFIO_PCI_NUM_IRQS, &data_size);
 			if (ret) {
-				gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
+				gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
 				return -EINVAL;
 			}
 			if (data_size) {
@@ -1310,7 +1312,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 
 	kvm = vgpu->vdev.kvm;
 	if (!kvm || kvm->mm != current->mm) {
-		gvt_err("KVM is required to use Intel vGPU\n");
+		gvt_vgpu_err("KVM is required to use Intel vGPU\n");
 		return -ESRCH;
 	}
 
@@ -1337,8 +1339,10 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
 {
+	struct intel_vgpu *vgpu = info->vgpu;
+
 	if (!info) {
-		gvt_err("kvmgt_guest_info invalid\n");
+		gvt_vgpu_err("kvmgt_guest_info invalid\n");
 		return false;
 	}
 
@@ -1383,12 +1387,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 	unsigned long iova, pfn;
 	struct kvmgt_guest_info *info;
 	struct device *dev;
+	struct intel_vgpu *vgpu;
 	int rc;
 
 	if (!handle_valid(handle))
 		return INTEL_GVT_INVALID_ADDR;
 
 	info = (struct kvmgt_guest_info *)handle;
+	vgpu = info->vgpu;
 	iova = gvt_cache_find(info->vgpu, gfn);
 	if (iova != INTEL_GVT_INVALID_ADDR)
 		return iova;
@@ -1397,13 +1403,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 	dev = mdev_dev(info->vgpu->vdev.mdev);
 	rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
 	if (rc != 1) {
-		gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
+		gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
+			gfn, rc);
 		return INTEL_GVT_INVALID_ADDR;
 	}
 	/* transfer to host iova for GFX to use DMA */
 	rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
 	if (rc) {
-		gvt_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
+		gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
 		vfio_unpin_pages(dev, &gfn, 1);
 		return INTEL_GVT_INVALID_ADDR;
 	}
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 60b698cb8365..1ba3bdb09341 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -142,10 +142,10 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 			ret = intel_gvt_hypervisor_read_gpa(vgpu, pa,
 					p_data, bytes);
 			if (ret) {
-				gvt_err("vgpu%d: guest page read error %d, "
+				gvt_vgpu_err("guest page read error %d, "
 					"gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-					vgpu->id, ret,
-					gp->gfn, pa, *(u32 *)p_data, bytes);
+					ret, gp->gfn, pa, *(u32 *)p_data,
+					bytes);
 			}
 			mutex_unlock(&gvt->lock);
 			return ret;
@@ -200,14 +200,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 		ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
 
 		if (!vgpu->mmio.disable_warn_untrack) {
-			gvt_err("vgpu%d: read untracked MMIO %x(%dB) val %x\n",
-				vgpu->id, offset, bytes, *(u32 *)p_data);
+			gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n",
+				offset, bytes, *(u32 *)p_data);
 
 			if (offset == 0x206c) {
-				gvt_err("------------------------------------------\n");
-				gvt_err("vgpu%d: likely triggers a gfx reset\n",
-					vgpu->id);
-				gvt_err("------------------------------------------\n");
+				gvt_vgpu_err("------------------------------------------\n");
+				gvt_vgpu_err("likely triggers a gfx reset\n");
+				gvt_vgpu_err("------------------------------------------\n");
 				vgpu->mmio.disable_warn_untrack = true;
 			}
 		}
@@ -220,8 +219,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 	mutex_unlock(&gvt->lock);
 	return 0;
 err:
-	gvt_err("vgpu%d: fail to emulate MMIO read %08x len %d\n",
-			vgpu->id, offset, bytes);
+	gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n",
+			offset, bytes);
 	mutex_unlock(&gvt->lock);
 	return ret;
 }
@@ -259,10 +258,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 		if (gp) {
 			ret = gp->handler(gp, pa, p_data, bytes);
 			if (ret) {
-				gvt_err("vgpu%d: guest page write error %d, "
-					"gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-					vgpu->id, ret,
-					gp->gfn, pa, *(u32 *)p_data, bytes);
+				gvt_err("guest page write error %d, "
+					"gfn 0x%lx, pa 0x%llx, "
+					"var 0x%x, len %d\n",
+					ret, gp->gfn, pa,
+					*(u32 *)p_data, bytes);
 			}
 			mutex_unlock(&gvt->lock);
 			return ret;
@@ -329,8 +329,8 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 
 			/* all register bits are RO. */
 			if (ro_mask == ~(u64)0) {
-				gvt_err("vgpu%d: try to write RO reg %x\n",
-						vgpu->id, offset);
+				gvt_vgpu_err("try to write RO reg %x\n",
+					offset);
 				ret = 0;
 				goto out;
 			}
@@ -360,8 +360,8 @@ out:
 	mutex_unlock(&gvt->lock);
 	return 0;
 err:
-	gvt_err("vgpu%d: fail to emulate MMIO write %08x len %d\n",
-			vgpu->id, offset, bytes);
+	gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset,
+		     bytes);
 	mutex_unlock(&gvt->lock);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index 5d1caf9daba9..311799136d7f 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -67,14 +67,15 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
 		mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu)->va
 			+ i * PAGE_SIZE);
 		if (mfn == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("fail to get MFN from VA\n");
+			gvt_vgpu_err("fail to get MFN from VA\n");
 			return -EINVAL;
 		}
 		ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
 				vgpu_opregion(vgpu)->gfn[i],
 				mfn, 1, map);
 		if (ret) {
-			gvt_err("fail to map GFN to MFN, errno: %d\n", ret);
+			gvt_vgpu_err("fail to map GFN to MFN, errno: %d\n",
+				ret);
 			return ret;
 		}
 	}
@@ -287,7 +288,7 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
 	parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM;
 
 	if (!(swsci & SWSCI_SCI_SELECT)) {
-		gvt_err("vgpu%d: requesting SMI service\n", vgpu->id);
+		gvt_vgpu_err("requesting SMI service\n");
 		return 0;
 	}
 	/* ignore non 0->1 trasitions */
@@ -300,9 +301,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
 	func = GVT_OPREGION_FUNC(*scic);
 	subfunc = GVT_OPREGION_SUBFUNC(*scic);
 	if (!querying_capabilities(*scic)) {
-		gvt_err("vgpu%d: requesting runtime service: func \"%s\","
+		gvt_vgpu_err("requesting runtime service: func \"%s\","
 				" subfunc \"%s\"\n",
-				vgpu->id,
 				opregion_func_name(func),
 				opregion_subfunc_name(subfunc));
 		/*
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 73f052a4f424..95ee091ce085 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -167,7 +167,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 	I915_WRITE_FW(reg, 0x1);
 
 	if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
-		gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id);
+		gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
 	else
 		vgpu_vreg(vgpu, regs[ring_id]) = 0;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d3a56c949025..d29e4352d529 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -84,7 +84,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 				(u32)((workload->ctx_desc.lrca + i) <<
 				GTT_PAGE_SHIFT));
 		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("Invalid guest context descriptor\n");
+			gvt_vgpu_err("Invalid guest context descriptor\n");
 			return -EINVAL;
 		}
 
@@ -176,6 +176,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
 	struct drm_i915_gem_request *rq;
+	struct intel_vgpu *vgpu = workload->vgpu;
 	int ret;
 
 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -189,7 +190,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 
 	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
 	if (IS_ERR(rq)) {
-		gvt_err("fail to allocate gem request\n");
+		gvt_vgpu_err("fail to allocate gem request\n");
 		ret = PTR_ERR(rq);
 		goto out;
 	}
@@ -322,7 +323,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 				(u32)((workload->ctx_desc.lrca + i) <<
 					GTT_PAGE_SHIFT));
 		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("invalid guest context descriptor\n");
+			gvt_vgpu_err("invalid guest context descriptor\n");
 			return;
 		}
 
@@ -417,6 +418,7 @@ static int workload_thread(void *priv)
 	int ring_id = p->ring_id;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload = NULL;
+	struct intel_vgpu *vgpu = NULL;
 	int ret;
 	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -459,7 +461,8 @@ static int workload_thread(void *priv)
 		mutex_unlock(&gvt->lock);
 
 		if (ret) {
-			gvt_err("fail to dispatch workload, skip\n");
+			vgpu = workload->vgpu;
+			gvt_vgpu_err("fail to dispatch workload, skip\n");
 			goto complete;
 		}
 
-- 
cgit v1.2.3


From 3f765a341798ebd4e0ece7cce34399a8fd4a7f9f Mon Sep 17 00:00:00 2001
From: Yulei Zhang <yulei.zhang@intel.com>
Date: Mon, 13 Mar 2017 23:21:27 +0800
Subject: drm/i915/gvt: correct the ggtt valid bit check in pipe control
 command

GGTT valid bit in pipe control command move to DWORD1 after SNB, so
change the valid check code correspondingly.

v2:
per Zhenyu's comment, replace the bit check with MACRO define
PIPE_CONTROL_GLOBAL_GTT_IVB

Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 2ca0506d8d8c..2b92cc8a7d1a 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1030,7 +1030,7 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s)
 			ret = cmd_reg_handler(s, 0x2358, 1, "pipe_ctrl");
 		else if (post_sync == 1) {
 			/* check ggtt*/
-			if ((cmd_val(s, 2) & (1 << 2))) {
+			if ((cmd_val(s, 1) & PIPE_CONTROL_GLOBAL_GTT_IVB)) {
 				gma = cmd_val(s, 2) & GENMASK(31, 3);
 				if (gmadr_bytes == 8)
 					gma |= (cmd_gma_hi(s, 3)) << 32;
-- 
cgit v1.2.3


From 3dce2aca02929f180ab66171b333fa48fe485a03 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 8 Mar 2017 22:08:08 +0000
Subject: drm/i915/gvt: Remove bogus retry around i915_wait_request

commit 8f1117abb408 ("drm/i915/gvt: handle workload lifecycle properly")
includes some nonsense to retry a indefinite wait - i915_wait_request()
does not return until the request is completed when used from an
uninterruptible context.

Fixes: 8f1117abb408 ("drm/i915/gvt: handle workload lifecycle properly"
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d29e4352d529..dd8f8cc69a76 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -468,19 +468,7 @@ static int workload_thread(void *priv)
 
 		gvt_dbg_sched("ring id %d wait workload %p\n",
 				workload->ring_id, workload);
-retry:
-		i915_wait_request(workload->req,
-					 0, MAX_SCHEDULE_TIMEOUT);
-		/* I915 has replay mechanism and a request will be replayed
-		 * if there is i915 reset. So the seqno will be updated anyway.
-		 * If the seqno is not updated yet after waiting, which means
-		 * the replay may still be in progress and we can wait again.
-		 */
-		if (!i915_gem_request_completed(workload->req)) {
-			gvt_dbg_sched("workload %p not completed, wait again\n",
-					workload);
-			goto retry;
-		}
+		i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
 
 complete:
 		gvt_dbg_sched("will complete workload %p, status: %d\n",
-- 
cgit v1.2.3


From cf2135ca3d50d6468e9216fef3d0d33c31af635b Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Thu, 9 Mar 2017 23:07:12 +0800
Subject: drm/i915/gvt: add enable_execlists check before enable gvt

The GVT-g needs execlists to be enabled otherwise gvt should be
disabled. Add a check for enable_execlists before enabling gvt.

v2: use DRM_INFO in response to the user action

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_gvt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index d23c0fcff751..8c04eca84351 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -77,6 +77,11 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
 		goto bail;
 	}
 
+	if (!i915.enable_execlists) {
+		DRM_INFO("GPU guest virtualisation [GVT-g] disabled due to disabled execlist submission [i915.enable_execlists module parameter]\n");
+		goto bail;
+	}
+
 	/*
 	 * We're not in host or fail to find a MPT module, disable GVT-g
 	 */
-- 
cgit v1.2.3


From 5180edc2421117766fcb9c2d2dc6bfaeefdeb709 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 16 Mar 2017 09:45:09 +0800
Subject: drm/i915/kvmgt: fix suspicious rcu dereference usage

The srcu read lock must be held while accessing kvm memslots.
This patch fix below warning for function kvmgt_rw_gpa().

[  165.345093] [ ERR: suspicious RCU usage.  ]
[  165.416538] Call Trace:
[  165.418989]  dump_stack+0x85/0xc2
[  165.422310]  lockdep_rcu_suspicious+0xd7/0x110
[  165.426769]  kvm_read_guest_page+0x195/0x1b0 [kvm]
[  165.431574]  kvm_read_guest+0x50/0x90 [kvm]
[  165.440492]  kvmgt_rw_gpa+0x43/0xa0 [kvmgt]
[  165.444683]  kvmgt_read_gpa+0x11/0x20 [kvmgt]
[  165.449061]  gtt_get_entry64+0x4d/0xc0 [i915]
[  165.453438]  ppgtt_populate_shadow_page_by_guest_entry+0x380/0xdc0 [i915]
[  165.460254]  shadow_mm+0xd1/0x460 [i915]
[  165.472488]  intel_vgpu_create_mm+0x1ab/0x210 [i915]
[  165.477472]  intel_vgpu_g2v_create_ppgtt_mm+0x5f/0xc0 [i915]
[  165.483154]  pvinfo_mmio_write+0x19b/0x1d0 [i915]
[  165.499068]  intel_vgpu_emulate_mmio_write+0x3f9/0x600 [i915]
[  165.504827]  intel_vgpu_rw+0x114/0x150 [kvmgt]
[  165.509281]  intel_vgpu_write+0x16f/0x1a0 [kvmgt]
[  165.513993]  vfio_mdev_write+0x20/0x30 [vfio_mdev]
[  165.518793]  vfio_device_fops_write+0x24/0x30 [vfio]
[  165.523770]  __vfs_write+0x28/0x120
[  165.540529]  vfs_write+0xce/0x1f0

v2: fix Cc format for stable

Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Jike Song <jike.song@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index cd218b07c6f6..1ea3eb270de8 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1424,7 +1424,7 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
 {
 	struct kvmgt_guest_info *info;
 	struct kvm *kvm;
-	int ret;
+	int idx, ret;
 	bool kthread = current->mm == NULL;
 
 	if (!handle_valid(handle))
@@ -1436,8 +1436,10 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
 	if (kthread)
 		use_mm(kvm->mm);
 
+	idx = srcu_read_lock(&kvm->srcu);
 	ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
 		      kvm_read_guest(kvm, gpa, buf, len);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	if (kthread)
 		unuse_mm(kvm->mm);
-- 
cgit v1.2.3


From 17f1b1a6d4e5f41df161eb2a90af22c003a243fc Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Wed, 15 Mar 2017 23:16:01 -0400
Subject: drm/i915/gvt: scan shadow indirect context image when valid

The shadow indirect context image should be only scanned when valid.
So far, Only RCS ring has the shadow indirect context image. This patch
limits the scan logic only for RCS ring.

v2. refine description of the subject
v3. fix alignment. (Zhenyu)

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index dd8f8cc69a76..907e6bc794f6 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -203,9 +203,12 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	if (ret)
 		goto out;
 
-	ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
-	if (ret)
-		goto out;
+	if ((workload->ring_id == RCS) &&
+	    (workload->wa_ctx.indirect_ctx.size != 0)) {
+		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
+		if (ret)
+			goto out;
+	}
 
 	ret = populate_shadow_context(workload);
 	if (ret)
-- 
cgit v1.2.3


From 3cd23b828b37bd5ccf4b40132f12dbf20d7afdb6 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Thu, 16 Mar 2017 09:47:58 +0800
Subject: drm/i915/gvt: GVT pin/unpin shadow context

When handling guest request, GVT needs to populate/update shadow_ctx
with guest context. This behavior needs to make sure the shadow_ctx
is pinned. The current implementation is relying on i195 allocate request
to pin but this way cannot guarantee the i915 not to unpin the shadow_ctx
when GVT update the guest context from shadow_ctx. So GVT should pin/unpin
the shadow_ctx by itself.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 907e6bc794f6..39a83eb7aecc 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -175,6 +175,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	int ring_id = workload->ring_id;
 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 	struct drm_i915_gem_request *rq;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	int ret;
@@ -188,6 +189,21 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	/* pin shadow context by gvt even the shadow context will be pinned
+	 * when i915 alloc request. That is because gvt will update the guest
+	 * context from shadow context when workload is completed, and at that
+	 * moment, i915 may already unpined the shadow context to make the
+	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
+	 * the guest context, gvt can unpin the shadow_ctx safely.
+	 */
+	ret = engine->context_pin(engine, shadow_ctx);
+	if (ret) {
+		gvt_vgpu_err("fail to pin shadow context\n");
+		workload->status = ret;
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+		return ret;
+	}
+
 	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
 	if (IS_ERR(rq)) {
 		gvt_vgpu_err("fail to allocate gem request\n");
@@ -231,6 +247,9 @@ out:
 
 	if (!IS_ERR_OR_NULL(rq))
 		i915_add_request_no_flush(rq);
+	else
+		engine->context_unpin(engine, shadow_ctx);
+
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
@@ -380,6 +399,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 	 * For the workload w/o request, directly complete the workload.
 	 */
 	if (workload->req) {
+		struct drm_i915_private *dev_priv =
+			workload->vgpu->gvt->dev_priv;
+		struct intel_engine_cs *engine =
+			dev_priv->engine[workload->ring_id];
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
@@ -392,6 +415,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 					 INTEL_GVT_EVENT_MAX)
 				intel_vgpu_trigger_virtual_event(vgpu, event);
 		}
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		/* unpin shadow ctx as the shadow_ctx update is done */
+		engine->context_unpin(engine, workload->vgpu->shadow_ctx);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
-- 
cgit v1.2.3


From 2958b9013fcbabeeba221161d0712f5259f1e15d Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 17 Mar 2017 12:11:20 +0800
Subject: drm/i915/gvt: Fix gvt scheduler interval time

Fix to correctly assign 1ms for gvt scheduler interval time,
as previous code using HZ is pretty broken. And use no delay
for start gvt scheduler function.

Fixes: 4b63960ebd3f ("drm/i915/gvt: vGPU schedule policy framework")
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: stable@vger.kernel.org # v4.10+
Acked-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/sched_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 06c9584ac5f0..34b9acdf3479 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -101,7 +101,7 @@ struct tbs_sched_data {
 	struct list_head runq_head;
 };
 
-#define GVT_DEFAULT_TIME_SLICE (1 * HZ / 1000)
+#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
 
 static void tbs_sched_func(struct work_struct *work)
 {
@@ -223,7 +223,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
 		return;
 
 	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
-	schedule_delayed_work(&sched_data->work, sched_data->period);
+	schedule_delayed_work(&sched_data->work, 0);
 }
 
 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
-- 
cgit v1.2.3


From e7ede72a6d40cb3a30c087142d79381ca8a31dab Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 15 Mar 2017 22:53:37 +0100
Subject: perf symbols: Fix symbols__fixup_end heuristic for corner cases

The current symbols__fixup_end() heuristic for the last entry in the rb
tree is suboptimal as it leads to not being able to recognize the symbol
in the call graph in a couple of corner cases, for example:

 i) If the symbol has a start address (f.e. exposed via kallsyms)
    that is at a page boundary, then the roundup(curr->start, 4096)
    for the last entry will result in curr->start == curr->end with
    a symbol length of zero.

ii) If the symbol has a start address that is shortly before a page
    boundary, then also here, curr->end - curr->start will just be
    very few bytes, where it's unrealistic that we could perform a
    match against.

Instead, change the heuristic to roundup(curr->start, 4096) + 4096, so
that we can catch such corner cases and have a better chance to find
that specific symbol. It's still just best effort as the real end of the
symbol is unknown to us (and could even be at a larger offset than the
current range), but better than the current situation.

Alexei reported that he recently run into case i) with a JITed eBPF
program (these are all page aligned) as the last symbol which wasn't
properly shown in the call graph (while other eBPF program symbols in
the rb tree were displayed correctly). Since this is a generic issue,
lets try to improve the heuristic a bit.

Reported-and-Tested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Fixes: 2e538c4a1847 ("perf tools: Improve kernel/modules symbol lookup")
Link: http://lkml.kernel.org/r/bb5c80d27743be6f12afc68405f1956a330e1bc9.1489614365.git.daniel@iogearbox.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 70e389bc4af7..9b4d8ba22fed 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols)
 
 	/* Last entry */
 	if (curr->end == curr->start)
-		curr->end = roundup(curr->start, 4096);
+		curr->end = roundup(curr->start, 4096) + 4096;
 }
 
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
-- 
cgit v1.2.3


From cf8c73afb3abf0f8905efbaddd4ce11a0deec9da Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Fri, 17 Mar 2017 10:22:51 +0800
Subject: drm/amd/amdgpu: add POLARIS12 PCI ID

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f7adbace428a..b76cd699eb0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -421,6 +421,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 
 	{0, 0, 0}
-- 
cgit v1.2.3


From 38a33101dd5fb8f07244e7e6dd04747a6cd2e3fb Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Thu, 9 Mar 2017 11:36:36 +0800
Subject: NFS: fix the fault nrequests decreasing for nfs_inode COPY

The nfs_commit_file for NFSv4.2's COPY operation goes through
the commit path for normal WRITE, but without increase nrequests,
so, the nrequests decreased in nfs_commit_release_pages is fault.
After that, the nrequests will be wrong.

[ 5670.299881] ------------[ cut here ]------------
[ 5670.300295] WARNING: CPU: 0 PID: 27656 at fs/nfs/inode.c:127 nfs_clear_inode+0x66/0x90 [nfs]
[ 5670.300558] Modules linked in: nfsv4(E) nfs(E) fscache(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event ppdev f2fs coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_ens1371 intel_rapl_perf gameport snd_ac97_codec vmw_balloon ac97_bus snd_seq snd_pcm joydev snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c vmwgfx drm_kms_helper ttm drm e1000 crc32c_intel mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: fscache]
[ 5670.302925] CPU: 0 PID: 27656 Comm: umount.nfs4 Tainted: G        W   E   4.11.0-rc1+ #519
[ 5670.303292] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[ 5670.304094] Call Trace:
[ 5670.304510]  dump_stack+0x63/0x86
[ 5670.304917]  __warn+0xcb/0xf0
[ 5670.305276]  warn_slowpath_null+0x1d/0x20
[ 5670.305661]  nfs_clear_inode+0x66/0x90 [nfs]
[ 5670.306093]  nfs4_evict_inode+0x61/0x70 [nfsv4]
[ 5670.306480]  evict+0xbb/0x1c0
[ 5670.306888]  dispose_list+0x4d/0x70
[ 5670.307233]  evict_inodes+0x178/0x1a0
[ 5670.307579]  generic_shutdown_super+0x44/0xf0
[ 5670.307985]  nfs_kill_super+0x21/0x40 [nfs]
[ 5670.308325]  deactivate_locked_super+0x43/0x70
[ 5670.308698]  deactivate_super+0x5a/0x60
[ 5670.309036]  cleanup_mnt+0x3f/0x90
[ 5670.309407]  __cleanup_mnt+0x12/0x20
[ 5670.309837]  task_work_run+0x80/0xa0
[ 5670.310162]  exit_to_usermode_loop+0x89/0x90
[ 5670.310497]  syscall_return_slowpath+0xaa/0xb0
[ 5670.310875]  entry_SYSCALL_64_fastpath+0xa7/0xa9
[ 5670.311197] RIP: 0033:0x7f1bb3617fe7
[ 5670.311545] RSP: 002b:00007ffecbabb828 EFLAGS: 00000206 ORIG_RAX: 00000000000000a6
[ 5670.311906] RAX: 0000000000000000 RBX: 0000000001dca1f0 RCX: 00007f1bb3617fe7
[ 5670.312239] RDX: 000000000000000c RSI: 0000000000000001 RDI: 0000000001dc83c0
[ 5670.312653] RBP: 0000000001dc83c0 R08: 0000000000000001 R09: 0000000000000000
[ 5670.312998] R10: 0000000000000755 R11: 0000000000000206 R12: 00007ffecbabc66a
[ 5670.313335] R13: 0000000001dc83a0 R14: 0000000000000000 R15: 0000000000000000
[ 5670.313758] ---[ end trace bf4bfe7764e4eb40 ]---

Cc: linux-kernel@vger.kernel.org
Fixes: 67911c8f18 ("NFS: Add nfs_commit_file()")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/write.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e75b056f46f4..abb2c8a3be42 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 			(long long)req_offset(req));
 		if (status < 0) {
 			nfs_context_set_write_error(req->wb_context, status);
-			nfs_inode_remove_request(req);
+			if (req->wb_page)
+				nfs_inode_remove_request(req);
 			dprintk_cont(", error = %d\n", status);
 			goto next;
 		}
@@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		 * returned by the server against all stored verfs. */
 		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
 			/* We have a match */
-			nfs_inode_remove_request(req);
+			if (req->wb_page)
+				nfs_inode_remove_request(req);
 			dprintk_cont(" OK\n");
 			goto next;
 		}
-- 
cgit v1.2.3


From eed50879d64ab1b9f76445dbab822e43a098b309 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 11 Mar 2017 15:52:47 -0500
Subject: xprtrdma: Squelch kbuild sparse complaint

New complaint from kbuild for 4.9.y:

net/sunrpc/xprtrdma/verbs.c:489:19: sparse: incompatible types in
    comparison expression (different type sizes)

verbs.c:
489	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);

I can't reproduce this running sparse here. Likewise, "make W=1
net/sunrpc/xprtrdma/verbs.o" never indicated any issue.

A little poking suggests that because the range of its values is
small, gcc can make the actual width of RPCRDMA_MAX_SEND_SGES
smaller than the width of an unsigned integer.

Fixes: 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@kernel.org
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 81cd31acf690..3b332b395045 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	struct ib_cq *sendcq, *recvcq;
 	int rc;
 
-	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+			RPCRDMA_MAX_SEND_SGES);
 	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
 		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
 		return -ENOMEM;
-- 
cgit v1.2.3


From 05fae7bbc237bc7de0ee9c3dcf85b2572a80e3b5 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Mar 2017 10:48:13 +0800
Subject: nfs: make nfs4_cb_sv_ops static

Fixes the following sparse warning:

fs/nfs/callback.c:235:21: warning: symbol 'nfs4_cb_sv_ops' was not
declared. Should it be static?

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 484bebc20bca..5c8a096d763e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -231,12 +231,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_module		= THIS_MODULE,
 };
 
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = &nfs41_cb_sv_ops,
 };
 #else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = NULL,
 };
-- 
cgit v1.2.3


From 63513232f8cd219dcaa5eafae028740ed3067d83 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 13 Mar 2017 10:36:19 -0400
Subject: NFS prevent double free in async nfs4_exchange_id

Since rpc_task is async, the release function should be called which
will free the impl_id, scope, and owner.

Trond pointed at 2 more problems:
-- use of client pointer after free in the nfs4_exchangeid_release() function
-- cl_count mismatch if rpc_run_task() isn't run

Fixes: 8d89bd70bc9 ("NFS setup async exchange_id")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Cc: stable@vger.kernel.org # 4.9
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c1f5369cd339..c780d98035cc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7425,11 +7425,11 @@ static void nfs4_exchange_id_release(void *data)
 	struct nfs41_exchange_id_data *cdata =
 					(struct nfs41_exchange_id_data *)data;
 
-	nfs_put_client(cdata->args.client);
 	if (cdata->xprt) {
 		xprt_put(cdata->xprt);
 		rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
 	}
+	nfs_put_client(cdata->args.client);
 	kfree(cdata->res.impl_id);
 	kfree(cdata->res.server_scope);
 	kfree(cdata->res.server_owner);
@@ -7536,10 +7536,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	task_setup_data.callback_data = calldata;
 
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-	status = PTR_ERR(task);
-		goto out_impl_id;
-	}
+	if (IS_ERR(task))
+		return PTR_ERR(task);
 
 	if (!xprt) {
 		status = rpc_wait_for_completion_task(task);
@@ -7567,6 +7565,7 @@ out_server_owner:
 	kfree(calldata->res.server_owner);
 out_calldata:
 	kfree(calldata);
+	nfs_put_client(clp);
 	goto out;
 }
 
-- 
cgit v1.2.3


From 033853325fe3bdc70819a8b97915bd3bca41d3af Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 8 Mar 2017 14:39:15 -0500
Subject: NFSv4.1 respect server's max size in CREATE_SESSION

Currently client doesn't respect max sizes server returns in CREATE_SESSION.
nfs4_session_set_rwsize() gets called and server->rsize, server->wsize are 0
so they never get set to the sizes returned by the server.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 5ae9d64ea08b..8346ccbf2d52 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
 	server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
 	server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
 
-	if (server->rsize > server_resp_sz)
+	if (!server->rsize || server->rsize > server_resp_sz)
 		server->rsize = server_resp_sz;
-	if (server->wsize > server_rqst_sz)
+	if (!server->wsize || server->wsize > server_rqst_sz)
 		server->wsize = server_rqst_sz;
 #endif /* CONFIG_NFS_V4_1 */
 }
-- 
cgit v1.2.3


From a33e4b036d4612f62220f37a9fa29d273b6fd0ca Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@monkey.org>
Date: Thu, 9 Mar 2017 12:56:48 -0500
Subject: pNFS: return status from nfs4_pnfs_ds_connect

The nfs4_pnfs_ds_connect path can call rpc_create which can fail or it
can wait on another context to reach the same failure.

This checks that the rpc_create succeeded and returns the error to the
caller.

When an error is returned, both the files and flexfiles layouts will return
NULL from _prepare_ds(). The flexfiles layout will also return the layout
with the error NFS4ERR_NXIO.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/client.c                           | 25 ++++++++++++++++++++++++-
 fs/nfs/filelayout/filelayoutdev.c         |  7 ++++++-
 fs/nfs/flexfilelayout/flexfilelayoutdev.c |  3 ++-
 fs/nfs/internal.h                         |  2 ++
 fs/nfs/pnfs.h                             |  2 +-
 fs/nfs/pnfs_nfs.c                         | 15 +++++++++++++--
 6 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 91a8d610ba0f..390ada8741bc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	return NULL;
 }
 
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
 {
 	return clp->cl_cons_state <= NFS_CS_READY;
 }
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+	/* called without checking nfs_client_init_is_complete */
+	if (clp->cl_cons_state > NFS_CS_READY) {
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+	return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
 
 int nfs_wait_client_init_complete(const struct nfs_client *clp)
 {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index f956ca20a8a3..188120626179 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
 	struct nfs4_pnfs_ds *ret = ds;
 	struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+	int status;
 
 	if (ds == NULL) {
 		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,13 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	if (ds->ds_clp)
 		goto out_test_devid;
 
-	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
 			     dataserver_retrans, 4,
 			     s->nfs_client->cl_minorversion);
+	if (status) {
+		ret = NULL;
+		goto out;
+	}
 
 out_test_devid:
 	if (ret->ds_clp == NULL ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e5a6f248697b..544e7725e679 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -384,6 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 	struct inode *ino = lseg->pls_layout->plh_inode;
 	struct nfs_server *s = NFS_SERVER(ino);
 	unsigned int max_payload;
+	int status;
 
 	if (!ff_layout_mirror_valid(lseg, mirror, true)) {
 		pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +405,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 	/* FIXME: For now we assume the server sent only one version of NFS
 	 * to use for the DS.
 	 */
-	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
 			     dataserver_retrans,
 			     mirror->mirror_ds->ds_versions[0].version,
 			     mirror->mirror_ds->ds_versions[0].minor_version);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 09ca5095c04e..7b38fedb7e03 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *,
 					   rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
 extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
 extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
 extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 63f77b49a586..590e1e35781f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
 struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
 				      gfp_t gfp_flags);
 void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version);
 struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 9414b492439f..a7691b927af6 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -745,9 +745,9 @@ out:
 /*
  * Create an rpc connection to the nfs4_pnfs_ds data server.
  * Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
  */
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version)
 {
@@ -772,6 +772,17 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 	} else {
 		nfs4_wait_ds_connect(ds);
 	}
+
+	/*
+	 * At this point the ds->ds_clp should be ready, but it might have
+	 * hit an error.
+	 */
+	if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return nfs_client_init_status(ds->ds_clp);
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 
-- 
cgit v1.2.3


From da066f3f039eba3e72e97b2ccad0dd8b45ba84bd Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@monkey.org>
Date: Thu, 9 Mar 2017 12:56:49 -0500
Subject: pNFS/flexfiles: never nfs4_mark_deviceid_unavailable

The flexfiles layout should never mark a device unavailable.

Move nfs4_mark_deviceid_unavailable out of nfs4_pnfs_ds_connect and call
directly from files layout where it's still needed.

The flexfiles driver still handles marked devices in error paths, but will
now print a rate limited warning.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayoutdev.c         |  1 +
 fs/nfs/flexfilelayout/flexfilelayout.h    | 14 +++++++++++++-
 fs/nfs/flexfilelayout/flexfilelayoutdev.c |  2 +-
 fs/nfs/pnfs_nfs.c                         | 24 ++++++++++++++++--------
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 188120626179..d913e818858f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -282,6 +282,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 			     dataserver_retrans, 4,
 			     s->nfs_client->cl_minorversion);
 	if (status) {
+		nfs4_mark_deviceid_unavailable(devid);
 		ret = NULL;
 		goto out;
 	}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f4f39b0ab09b..98b34c9b0564 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
 static inline bool
 ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
 {
-	return nfs4_test_deviceid_unavailable(node);
+	/*
+	 * Flexfiles should never mark a DS unavailable, but if it does
+	 * print a (ratelimited) warning as this can affect performance.
+	 */
+	if (nfs4_test_deviceid_unavailable(node)) {
+		u32 *p = (u32 *)node->deviceid.data;
+
+		pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+				"unavailable device [%x%x%x%x]\n",
+				p[0], p[1], p[2], p[3]);
+		return true;
+	}
+	return false;
 }
 
 static inline int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 544e7725e679..85fde93dff77 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -421,11 +421,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 			mirror->mirror_ds->ds_versions[0].wsize = max_payload;
 		goto out;
 	}
+out_fail:
 	ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
 				 mirror, lseg->pls_range.offset,
 				 lseg->pls_range.length, NFS4ERR_NXIO,
 				 OP_ILLEGAL, GFP_NOIO);
-out_fail:
 	if (fail_return || !ff_layout_has_available_ds(lseg))
 		pnfs_error_mark_layout_for_return(ino, lseg);
 	ds = NULL;
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index a7691b927af6..7250b95549ec 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -751,9 +751,11 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version)
 {
-	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
-		int err = 0;
+	int err;
 
+again:
+	err = 0;
+	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
 		if (version == 3) {
 			err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
 						       retrans);
@@ -766,23 +768,29 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			err = -EPROTONOSUPPORT;
 		}
 
-		if (err)
-			nfs4_mark_deviceid_unavailable(devid);
 		nfs4_clear_ds_conn_bit(ds);
 	} else {
 		nfs4_wait_ds_connect(ds);
+
+		/* what was waited on didn't connect AND didn't mark unavail */
+		if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+			goto again;
 	}
 
 	/*
 	 * At this point the ds->ds_clp should be ready, but it might have
 	 * hit an error.
 	 */
-	if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
-		WARN_ON_ONCE(1);
-		return -EINVAL;
+	if (!err) {
+		if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+			WARN_ON_ONCE(ds->ds_clp ||
+				!nfs4_test_deviceid_unavailable(devid));
+			return -EINVAL;
+		}
+		err = nfs_client_init_status(ds->ds_clp);
 	}
 
-	return nfs_client_init_status(ds->ds_clp);
+	return err;
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 
-- 
cgit v1.2.3


From 49d4a334727057af57048ded99697d17b016d91b Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 6 Mar 2017 18:20:56 -0800
Subject: Btrfs: fix regression in lock_delalloc_pages

The bug is a regression after commit
(da2c7009f6ca "btrfs: teach __process_pages_contig about PAGE_LOCK operation")
and commit
(76c0021db8fd "Btrfs: use helper to simplify lock/unlock pages").

So if the dirty pages which are under writeback got truncated partially
before we lock the dirty pages, we couldn't find all pages mapping to the
delalloc range, and the bug didn't return an error so it kept going on and
found that the delalloc range got truncated and got to unlock the dirty
pages, and then the ASSERT could caught the error, and showed

-----------------------------------------------------------------------------
assertion failed: page_ops & PAGE_LOCK, file: fs/btrfs/extent_io.c, line: 1716
-----------------------------------------------------------------------------

This fixes the bug by returning the proper -EAGAIN.

Cc: David Sterba <dsterba@suse.com>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 28e81922a21c..8df797432740 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping,
 			 * can we find nothing at @index.
 			 */
 			ASSERT(page_ops & PAGE_LOCK);
-			return ret;
+			err = -EAGAIN;
+			goto out;
 		}
 
 		for (i = 0; i < ret; i++) {
-- 
cgit v1.2.3


From e1699d2d7bf6e6cce3e1baff19f9dd4595a58664 Mon Sep 17 00:00:00 2001
From: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
Date: Fri, 10 Mar 2017 16:45:44 -0500
Subject: btrfs: add missing memset while reading compressed inline extents

This is a story about 4 distinct (and very old) btrfs bugs.

Commit c8b978188c ("Btrfs: Add zlib compression support") added
three data corruption bugs for inline extents (bugs #1-3).

Commit 93c82d5750 ("Btrfs: zero page past end of inline file items")
fixed bug #1:  uncompressed inline extents followed by a hole and more
extents could get non-zero data in the hole as they were read.  The fix
was to add a memset in btrfs_get_extent to zero out the hole.

Commit 166ae5a418 ("btrfs: fix inline compressed read err corruption")
fixed bug #2:  compressed inline extents which contained non-zero bytes
might be replaced with zero bytes in some cases.  This patch removed an
unhelpful memset from uncompress_inline, but the case where memset is
required was missed.

There is also a memset in the decompression code, but this only covers
decompressed data that is shorter than the ram_bytes from the extent
ref record.  This memset doesn't cover the region between the end of the
decompressed data and the end of the page.  It has also moved around a
few times over the years, so there's no single patch to refer to.

This patch fixes bug #3:  compressed inline extents followed by a hole
and more extents could get non-zero data in the hole as they were read
(i.e. bug #3 is the same as bug #1, but s/uncompressed/compressed/).
The fix is the same:  zero out the hole in the compressed case too,
by putting a memset back in uncompress_inline, but this time with
correct parameters.

The last and oldest bug, bug #0, is the cause of the offending inline
extent/hole/extent pattern.  Bug #0 is a subtle and mostly-harmless quirk
of behavior somewhere in the btrfs write code.  In a few special cases,
an inline extent and hole are allowed to persist where they normally
would be combined with later extents in the file.

A fast reproducer for bug #0 is presented below.  A few offending extents
are also created in the wild during large rsync transfers with the -S
flag.  A Linux kernel build (git checkout; make allyesconfig; make -j8)
will produce a handful of offending files as well.  Once an offending
file is created, it can present different content to userspace each
time it is read.

Bug #0 is at least 4 and possibly 8 years old.  I verified every vX.Y
kernel back to v3.5 has this behavior.  There are fossil records of this
bug's effects in commits all the way back to v2.6.32.  I have no reason
to believe bug #0 wasn't present at the beginning of btrfs compression
support in v2.6.29, but I can't easily test kernels that old to be sure.

It is not clear whether bug #0 is worth fixing.  A fix would likely
require injecting extra reads into currently write-only paths, and most
of the exceptional cases caused by bug #0 are already handled now.

Whether we like them or not, bug #0's inline extents followed by holes
are part of the btrfs de-facto disk format now, and we need to be able
to read them without data corruption or an infoleak.  So enough about
bug #0, let's get back to bug #3 (this patch).

An example of on-disk structure leading to data corruption found in
the wild:

        item 61 key (606890 INODE_ITEM 0) itemoff 9662 itemsize 160
                inode generation 50 transid 50 size 47424 nbytes 49141
                block group 0 mode 100644 links 1 uid 0 gid 0
                rdev 0 flags 0x0(none)
        item 62 key (606890 INODE_REF 603050) itemoff 9642 itemsize 20
                inode ref index 3 namelen 10 name: DB_File.so
        item 63 key (606890 EXTENT_DATA 0) itemoff 8280 itemsize 1362
                inline extent data size 1341 ram 4085 compress(zlib)
        item 64 key (606890 EXTENT_DATA 4096) itemoff 8227 itemsize 53
                extent data disk byte 5367308288 nr 20480
                extent data offset 0 nr 45056 ram 45056
                extent compression(zlib)

Different data appears in userspace during each read of the 11 bytes
between 4085 and 4096.  The extent in item 63 is not long enough to
fill the first page of the file, so a memset is required to fill the
space between item 63 (ending at 4085) and item 64 (beginning at 4096)
with zero.

Here is a reproducer from Liu Bo, which demonstrates another method
of creating the same inline extent and hole pattern:

Using 'page_poison=on' kernel command line (or enable
CONFIG_PAGE_POISONING) run the following:

	# touch foo
	# chattr +c foo
	# xfs_io -f -c "pwrite -W 0 1000" foo
	# xfs_io -f -c "falloc 4 8188" foo
	# od -x foo
	# echo 3 >/proc/sys/vm/drop_caches
	# od -x foo

This produce the following on my box:

Correct output:  file contains 1000 data bytes followed
by zeros:

	0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
	*
	0001740 cdcd cdcd cdcd cdcd 0000 0000 0000 0000
	0001760 0000 0000 0000 0000 0000 0000 0000 0000
	*
	0020000

Actual output:  the data after the first 1000 bytes
will be different each run:

	0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
	*
	0001740 cdcd cdcd cdcd cdcd 6c63 7400 635f 006d
	0001760 5f74 6f43 7400 435f 0053 5f74 7363 7400
	0002000 435f 0056 5f74 6164 7400 645f 0062 5f74
	(...)

Signed-off-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: Chris Mason <clm@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/inode.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b2bc07aad1ae..e57191072aa3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	max_size = min_t(unsigned long, PAGE_SIZE, max_size);
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
+
+	/*
+	 * decompression code contains a memset to fill in any space between the end
+	 * of the uncompressed data and the end of max_size in case the decompressed
+	 * data ends up shorter than ram_bytes.  That doesn't cover the hole between
+	 * the end of an inline extent and the beginning of the next block, so we
+	 * cover that region here.
+	 */
+
+	if (max_size + pg_offset < PAGE_SIZE) {
+		char *map = kmap(page);
+		memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+		kunmap(page);
+	}
 	kfree(tmp);
 	return ret;
 }
-- 
cgit v1.2.3


From e7348396c6d51b57c95c6646c390cd078e038e19 Mon Sep 17 00:00:00 2001
From: Masaki Ota <masaki.ota@jp.alps.com>
Date: Fri, 17 Mar 2017 14:10:57 -0700
Subject: Input: ALPS - fix V8+ protocol handling (73 03 28)

Devices identified as E7="73 03 28" use slightly modified version of V8
protocol, with lower count per electrode, different offsets, and different
feature bits in OTP data.

Fixes: aeaa881f9b17 ("Input: ALPS - set DualPoint flag for 74 03 28 devices")
Signed-off-by: Masaki Ota <masaki.ota@jp.alps.com>
Acked-by: Pali Rohar <pali.rohar@gmail.com>
Tested-by: Paul Donohue <linux-kernel@PaulSD.com>
Tested-by: Nick Fletcher <nick.m.fletcher@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 66 +++++++++++++++++++++++++++++++++++-----------
 drivers/input/mouse/alps.h | 11 ++++++++
 2 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 72b28ebfe360..262d9b620fcf 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2462,14 +2462,34 @@ static int alps_update_device_area_ss4_v2(unsigned char otp[][4],
 	int num_y_electrode;
 	int x_pitch, y_pitch, x_phys, y_phys;
 
-	num_x_electrode = SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F);
-	num_y_electrode = SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F);
+	if (IS_SS4PLUS_DEV(priv->dev_id)) {
+		num_x_electrode =
+			SS4PLUS_NUMSENSOR_XOFFSET + (otp[0][2] & 0x0F);
+		num_y_electrode =
+			SS4PLUS_NUMSENSOR_YOFFSET + ((otp[0][2] >> 4) & 0x0F);
 
-	priv->x_max = (num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
-	priv->y_max = (num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+		priv->x_max =
+			(num_x_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE;
+		priv->y_max =
+			(num_y_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE;
 
-	x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM;
-	y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM;
+		x_pitch = (otp[0][1] & 0x0F) + SS4PLUS_MIN_PITCH_MM;
+		y_pitch = ((otp[0][1] >> 4) & 0x0F) + SS4PLUS_MIN_PITCH_MM;
+
+	} else {
+		num_x_electrode =
+			SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F);
+		num_y_electrode =
+			SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F);
+
+		priv->x_max =
+			(num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+		priv->y_max =
+			(num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+
+		x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM;
+		y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM;
+	}
 
 	x_phys = x_pitch * (num_x_electrode - 1); /* In 0.1 mm units */
 	y_phys = y_pitch * (num_y_electrode - 1); /* In 0.1 mm units */
@@ -2485,7 +2505,10 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4],
 {
 	unsigned char is_btnless;
 
-	is_btnless = (otp[1][1] >> 3) & 0x01;
+	if (IS_SS4PLUS_DEV(priv->dev_id))
+		is_btnless = (otp[1][0] >> 1) & 0x01;
+	else
+		is_btnless = (otp[1][1] >> 3) & 0x01;
 
 	if (is_btnless)
 		priv->flags |= ALPS_BUTTONPAD;
@@ -2493,6 +2516,21 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4],
 	return 0;
 }
 
+static int alps_update_dual_info_ss4_v2(unsigned char otp[][4],
+				       struct alps_data *priv)
+{
+	bool is_dual = false;
+
+	if (IS_SS4PLUS_DEV(priv->dev_id))
+		is_dual = (otp[0][0] >> 4) & 0x01;
+
+	if (is_dual)
+		priv->flags |= ALPS_DUALPOINT |
+					ALPS_DUALPOINT_WITH_PRESSURE;
+
+	return 0;
+}
+
 static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
 				    struct alps_data *priv)
 {
@@ -2508,6 +2546,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
 
 	alps_update_btn_info_ss4_v2(otp, priv);
 
+	alps_update_dual_info_ss4_v2(otp, priv);
+
 	return 0;
 }
 
@@ -2753,10 +2793,6 @@ static int alps_set_protocol(struct psmouse *psmouse,
 		if (alps_set_defaults_ss4_v2(psmouse, priv))
 			return -EIO;
 
-		if (priv->fw_ver[1] == 0x1)
-			priv->flags |= ALPS_DUALPOINT |
-					ALPS_DUALPOINT_WITH_PRESSURE;
-
 		break;
 	}
 
@@ -2827,10 +2863,7 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
 			   ec[2] >= 0x90 && ec[2] <= 0x9d) {
 			protocol = &alps_v3_protocol_data;
 		} else if (e7[0] == 0x73 && e7[1] == 0x03 &&
-			   e7[2] == 0x14 && ec[1] == 0x02) {
-			protocol = &alps_v8_protocol_data;
-		} else if (e7[0] == 0x73 && e7[1] == 0x03 &&
-			   e7[2] == 0x28 && ec[1] == 0x01) {
+			   (e7[2] == 0x14 || e7[2] == 0x28)) {
 			protocol = &alps_v8_protocol_data;
 		} else {
 			psmouse_dbg(psmouse,
@@ -2840,7 +2873,8 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
 	}
 
 	if (priv) {
-		/* Save the Firmware version */
+		/* Save Device ID and Firmware version */
+		memcpy(priv->dev_id, e7, 3);
 		memcpy(priv->fw_ver, ec, 3);
 		error = alps_set_protocol(psmouse, priv, protocol);
 		if (error)
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index 6d279aa27cb9..4334f2805d93 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -54,6 +54,16 @@ enum SS4_PACKET_ID {
 
 #define SS4_MASK_NORMAL_BUTTONS		0x07
 
+#define SS4PLUS_COUNT_PER_ELECTRODE	128
+#define SS4PLUS_NUMSENSOR_XOFFSET	16
+#define SS4PLUS_NUMSENSOR_YOFFSET	5
+#define SS4PLUS_MIN_PITCH_MM		37
+
+#define IS_SS4PLUS_DEV(_b)	(((_b[0]) == 0x73) &&	\
+				 ((_b[1]) == 0x03) &&	\
+				 ((_b[2]) == 0x28)		\
+				)
+
 #define SS4_IS_IDLE_V2(_b)	(((_b[0]) == 0x18) &&		\
 				 ((_b[1]) == 0x10) &&		\
 				 ((_b[2]) == 0x00) &&		\
@@ -283,6 +293,7 @@ struct alps_data {
 	int addr_command;
 	u16 proto_version;
 	u8 byte0, mask0;
+	u8 dev_id[3];
 	u8 fw_ver[3];
 	int flags;
 	int x_max;
-- 
cgit v1.2.3


From 47e6fb4212d09f325c0847d05985dd3d71553095 Mon Sep 17 00:00:00 2001
From: Masaki Ota <masaki.ota@jp.alps.com>
Date: Fri, 17 Mar 2017 14:19:40 -0700
Subject: Input: ALPS - fix trackstick button handling on V8 devices

Alps stick devices always have physical buttons, so we should not check
ALPS_BUTTONPAD flag to decide whether we should report them.

Fixes: 4777ac220c43 ("Input: ALPS - add touchstick support for SS5 hardware")
Signed-off-by: Masaki Ota <masaki.ota@jp.alps.com>
Acked-by: Pali Rohar <pali.rohar@gmail.com>
Tested-by: Paul Donohue <linux-kernel@PaulSD.com>
Tested-by: Nick Fletcher <nick.m.fletcher@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 262d9b620fcf..f210e19ddba6 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1282,10 +1282,8 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
 	/* handle buttons */
 	if (pkt_id == SS4_PACKET_ID_STICK) {
 		f->ts_left = !!(SS4_BTN_V2(p) & 0x01);
-		if (!(priv->flags & ALPS_BUTTONPAD)) {
-			f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
-			f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
-		}
+		f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
+		f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
 	} else {
 		f->left = !!(SS4_BTN_V2(p) & 0x01);
 		if (!(priv->flags & ALPS_BUTTONPAD)) {
-- 
cgit v1.2.3


From 7de32556dfc62b9e1203730cc26b71292da8a244 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 18 Mar 2017 00:57:39 +0100
Subject: cpufreq: intel_pstate: One set of global limits in active mode

In the active mode intel_pstate currently uses two sets of global
limits, each associated with one of the possible scaling_governor
settings in that mode: "powersave" or "performance".

The driver switches over from one of those sets to the other
depending on the scaling_governor setting for the last CPU whose
per-policy cpufreq interface in sysfs was last used to change
parameters exposed in there.  That obviously leads to no end of
issues when the scaling_governor settings differ between CPUs.

The most recent issue was introduced by commit a240c4aa5d0f (cpufreq:
intel_pstate: Do not reinit performance limits in ->setpolicy)
that eliminated the reinitialization of "performance" limits in
intel_pstate_set_policy() preventing the max limit from being set
to anything below 100, among other things.

Namely, an undesirable side effect of commit a240c4aa5d0f is that
now, after setting scaling_governor to "performance" in the active
mode, the per-policy limits for the CPU in question go to the highest
level and stay there even when it is switched back to "powersave"
later.

As it turns out, some distributions set scaling_governor to
"performance" temporarily for all CPUs to speed-up system
initialization, so that change causes them to misbehave later.

To fix that, get rid of the performance/powersave global limits
split and use just one set of global limits for everything.

From the user's persepctive, after this modification, when
scaling_governor is switched from "performance" to "powersave"
or the other way around on one CPU, the limits settings (ie. the
global max/min_perf_pct and per-policy scaling_max/min_freq for
any CPUs) will not change.  Still, switching from "performance"
to "powersave" or the other way around changes the way in which
P-states are selected and in particular "performance" causes the
driver to always request the highest P-state it is allowed to ask
for for the given CPU.

Fixes: a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 142 +++++++++++++----------------------------
 1 file changed, 46 insertions(+), 96 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 08e134ffba68..7b07803e7042 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -364,9 +364,7 @@ static bool driver_registered __read_mostly;
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits performance_limits;
-static struct perf_limits powersave_limits;
-static struct perf_limits *limits;
+static struct perf_limits global;
 
 static void intel_pstate_init_limits(struct perf_limits *limits)
 {
@@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits)
 	limits->max_sysfs_pct = 100;
 }
 
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-	intel_pstate_init_limits(limits);
-	limits->min_perf_pct = 100;
-	limits->min_perf = int_ext_tofp(1);
-	limits->min_sysfs_pct = 100;
-}
-
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
 
@@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	 * correct max turbo frequency based on the turbo state.
 	 * Also need to convert to MHz as _PSS freq is in MHz.
 	 */
-	if (!limits->turbo_disabled)
+	if (!global.turbo_disabled)
 		cpu->acpi_perf_data.states[0].core_frequency =
 					policy->cpuinfo.max_freq / 1000;
 	cpu->valid_pss_table = true;
@@ -626,7 +616,7 @@ static inline void update_turbo_state(void)
 
 	cpu = all_cpu_data[0];
 	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
-	limits->turbo_disabled =
+	global.turbo_disabled =
 		(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
@@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
 	int min, hw_min, max, hw_max, cpu;
-	struct perf_limits *perf_limits = limits;
+	struct perf_limits *perf_limits = &global;
 	u64 value, cap;
 
 	for_each_cpu(cpu, policy->cpus) {
@@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
 		hw_min = HWP_LOWEST_PERF(cap);
-		if (limits->no_turbo)
+		if (global.no_turbo)
 			hw_max = HWP_GUARANTEED_PERF(cap);
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
 
-		min = fp_ext_toint(hw_max * perf_limits->min_perf);
+		max = fp_ext_toint(hw_max * perf_limits->max_perf);
+		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+			min = max;
+		else
+			min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
 		value &= ~HWP_MIN_PERF(~0L);
 		value |= HWP_MIN_PERF(min);
 
-		max = fp_ext_toint(hw_max * perf_limits->max_perf);
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
 
@@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
 }
 
 static void intel_pstate_update_policies(void)
-	__releases(&intel_pstate_limits_lock)
-	__acquires(&intel_pstate_limits_lock)
 {
-	struct perf_limits *saved_limits = limits;
 	int cpu;
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	for_each_possible_cpu(cpu)
 		cpufreq_update_policy(cpu);
-
-	mutex_lock(&intel_pstate_limits_lock);
-
-	limits = saved_limits;
 }
 
 /************************** debugfs begin ************************/
@@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void)
 	static ssize_t show_##file_name					\
 	(struct kobject *kobj, struct attribute *attr, char *buf)	\
 	{								\
-		return sprintf(buf, "%u\n", limits->object);		\
+		return sprintf(buf, "%u\n", global.object);		\
 	}
 
 static ssize_t intel_pstate_show_status(char *buf);
@@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 	}
 
 	update_turbo_state();
-	if (limits->turbo_disabled)
-		ret = sprintf(buf, "%u\n", limits->turbo_disabled);
+	if (global.turbo_disabled)
+		ret = sprintf(buf, "%u\n", global.turbo_disabled);
 	else
-		ret = sprintf(buf, "%u\n", limits->no_turbo);
+		ret = sprintf(buf, "%u\n", global.no_turbo);
 
 	mutex_unlock(&intel_pstate_driver_lock);
 
@@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 	mutex_lock(&intel_pstate_limits_lock);
 
 	update_turbo_state();
-	if (limits->turbo_disabled) {
+	if (global.turbo_disabled) {
 		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 		mutex_unlock(&intel_pstate_limits_lock);
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EPERM;
 	}
 
-	limits->no_turbo = clamp_t(int, input, 0, 1);
-
-	intel_pstate_update_policies();
+	global.no_turbo = clamp_t(int, input, 0, 1);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
+	intel_pstate_update_policies();
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
-	limits->max_perf_pct = min(limits->max_policy_pct,
-				   limits->max_sysfs_pct);
-	limits->max_perf_pct = max(limits->min_policy_pct,
-				   limits->max_perf_pct);
-	limits->max_perf_pct = max(limits->min_perf_pct,
-				   limits->max_perf_pct);
-	limits->max_perf = percent_ext_fp(limits->max_perf_pct);
-
-	intel_pstate_update_policies();
+	global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
+	global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
+	global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
+	global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
+	global.max_perf = percent_ext_fp(global.max_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
+	intel_pstate_update_policies();
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
-	limits->min_perf_pct = max(limits->min_policy_pct,
-				   limits->min_sysfs_pct);
-	limits->min_perf_pct = min(limits->max_policy_pct,
-				   limits->min_perf_pct);
-	limits->min_perf_pct = min(limits->max_perf_pct,
-				   limits->min_perf_pct);
-	limits->min_perf = percent_ext_fp(limits->min_perf_pct);
-
-	intel_pstate_update_policies();
+	global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
+	global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
+	global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
+	global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
+	global.min_perf = percent_ext_fp(global.min_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
+	intel_pstate_update_policies();
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
 	u32 vid;
 
 	val = (u64)pstate << 8;
-	if (limits->no_turbo && !limits->turbo_disabled)
+	if (global.no_turbo && !global.turbo_disabled)
 		val |= (u64)1 << 32;
 
 	vid_fp = cpudata->vid.min + mul_fp(
@@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
 	u64 val;
 
 	val = (u64)pstate << 8;
-	if (limits->no_turbo && !limits->turbo_disabled)
+	if (global.no_turbo && !global.turbo_disabled)
 		val |= (u64)1 << 32;
 
 	return val;
@@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	int max_perf = cpu->pstate.turbo_pstate;
 	int max_perf_adj;
 	int min_perf;
-	struct perf_limits *perf_limits = limits;
+	struct perf_limits *perf_limits = &global;
 
-	if (limits->no_turbo || limits->turbo_disabled)
+	if (global.no_turbo || global.turbo_disabled)
 		max_perf = cpu->pstate.max_pstate;
 
 	if (per_cpu_limits)
@@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 
 	sample->busy_scaled = busy_frac * 100;
 
-	target = limits->no_turbo || limits->turbo_disabled ?
+	target = global.no_turbo || global.turbo_disabled ?
 			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 	target += target >> 2;
 	target = mul_fp(target, busy_frac);
@@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	struct perf_limits *perf_limits = NULL;
+	struct perf_limits *perf_limits = &global;
 
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
@@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
-		pr_debug("set performance\n");
-		if (!perf_limits) {
-			limits = &performance_limits;
-			perf_limits = limits;
-		}
-	} else {
-		pr_debug("set powersave\n");
-		if (!perf_limits) {
-			limits = &powersave_limits;
-			perf_limits = limits;
-		}
-
-	}
-
 	intel_pstate_update_perf_limits(policy, perf_limits);
 
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
@@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
-	struct perf_limits *perf_limits;
-
-	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
-		perf_limits = &performance_limits;
-	else
-		perf_limits = &powersave_limits;
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
-					perf_limits->no_turbo ?
+	policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
 					cpu->pstate.max_freq :
 					cpu->pstate.turbo_freq;
 
@@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 		unsigned int max_freq, min_freq;
 
 		max_freq = policy->cpuinfo.max_freq *
-					perf_limits->max_sysfs_pct / 100;
+					global.max_sysfs_pct / 100;
 		min_freq = policy->cpuinfo.max_freq *
-					perf_limits->min_sysfs_pct / 100;
+					global.min_sysfs_pct / 100;
 		cpufreq_verify_within_limits(policy, min_freq, max_freq);
 	}
 
@@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	/* cpuinfo and default policy values */
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	update_turbo_state();
-	policy->cpuinfo.max_freq = limits->turbo_disabled ?
+	policy->cpuinfo.max_freq = global.turbo_disabled ?
 			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
 
@@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 		return ret;
 
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+	if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 	else
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
@@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = limits->turbo_disabled ?
+	policy->cpuinfo.max_freq = global.turbo_disabled ?
 			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 
 	cpufreq_verify_within_cpu_limits(policy);
@@ -2317,7 +2273,7 @@ static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
 
 	update_turbo_state();
 
-	max_freq = limits->no_turbo || limits->turbo_disabled ?
+	max_freq = global.no_turbo || global.turbo_disabled ?
 			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 	policy->cpuinfo.max_freq = max_freq;
 	if (policy->max > max_freq)
@@ -2425,13 +2381,7 @@ static int intel_pstate_register_driver(void)
 {
 	int ret;
 
-	intel_pstate_init_limits(&powersave_limits);
-	intel_pstate_set_performance_limits(&performance_limits);
-	if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
-	    intel_pstate_driver == &intel_pstate)
-		limits = &performance_limits;
-	else
-		limits = &powersave_limits;
+	intel_pstate_init_limits(&global);
 
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
-- 
cgit v1.2.3


From 436ecf5519d892397af133a79ccd38a17c25fa51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 17 Mar 2017 17:21:28 +0100
Subject: USB: serial: qcserial: add Dell DW5811e
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a Dell branded Sierra Wireless EM7455.

Cc: <stable@vger.kernel.org>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/qcserial.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 696458db7e3c..38b3f0d8cd58 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -169,6 +169,8 @@ static const struct usb_device_id id_table[] = {
 	{DEVICE_SWI(0x413c, 0x81a9)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81b1)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81b3)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+	{DEVICE_SWI(0x413c, 0x81b5)},	/* Dell Wireless 5811e QDL */
+	{DEVICE_SWI(0x413c, 0x81b6)},	/* Dell Wireless 5811e QDL */
 
 	/* Huawei devices */
 	{DEVICE_HWI(0x03f0, 0x581d)},	/* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */
-- 
cgit v1.2.3


From 74e3f6e63da6c8e8246fba1689e040bc926b4a1a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 14 Mar 2017 15:24:51 +0530
Subject: parisc: perf: Fix potential NULL pointer dereference

Fix potential NULL pointer dereference and clean up
coding style errors (code indent, trailing whitespaces).

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/perf.c | 94 ++++++++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 45 deletions(-)

diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index e282a5131d77..6017a5af2e6e 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -39,7 +39,7 @@
  *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced
  *  in various PDC revisions.  The code is much more maintainable
  *  and reliable this way vs having to debug on every version of PDC
- *  on every box. 
+ *  on every box.
  */
 
 #include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
 static int perf_release(struct inode *inode, struct file *file);
 static int perf_open(struct inode *inode, struct file *file);
 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-	loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+	size_t count, loff_t *ppos);
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 static void perf_start_counters(void);
 static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
 /*
  * configure:
  *
- * Configure the cpu with a given data image.  First turn off the counters, 
+ * Configure the cpu with a given data image.  First turn off the counters,
  * then download the image, then turn the counters back on.
  */
 static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
 	error = perf_stop_counters(raddr);
 	if (error != 0) {
 		printk("perf_config: perf_stop_counters = %ld\n", error);
-		return -EINVAL; 
+		return -EINVAL;
 	}
 
 printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
 	error = perf_write_image((uint64_t *)image_ptr);
 	if (error != 0) {
 		printk("perf_config: DOWNLOAD = %ld\n", error);
-		return -EINVAL; 
+		return -EINVAL;
 	}
 
 printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
 }
 
 /*
- * Open the device and initialize all of its memory.  The device is only 
+ * Open the device and initialize all of its memory.  The device is only
  * opened once, but can be "queried" by multiple processes that know its
  * file descriptor.
  */
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
  * called on the processor that the download should happen
  * on.
  */
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-	loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+	size_t count, loff_t *ppos)
 {
 	size_t image_size;
 	uint32_t image_type;
 	uint32_t interface_type;
 	uint32_t test;
 
-	if (perf_processor_interface == ONYX_INTF) 
+	if (perf_processor_interface == ONYX_INTF)
 		image_size = PCXU_IMAGE_SIZE;
-	else if (perf_processor_interface == CUDA_INTF) 
+	else if (perf_processor_interface == CUDA_INTF)
 		image_size = PCXW_IMAGE_SIZE;
-	else 
+	else
 		return -EFAULT;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 
 	/* First check the machine type is correct for
 	   the requested image */
-        if (((perf_processor_interface == CUDA_INTF) &&
-		       (interface_type != CUDA_INTF)) ||
-	    ((perf_processor_interface == ONYX_INTF) &&
-	               (interface_type != ONYX_INTF))) 
+	if (((perf_processor_interface == CUDA_INTF) &&
+			(interface_type != CUDA_INTF)) ||
+		((perf_processor_interface == ONYX_INTF) &&
+			(interface_type != ONYX_INTF)))
 		return -EINVAL;
 
 	/* Next check to make sure the requested image
 	   is valid */
-	if (((interface_type == CUDA_INTF) && 
+	if (((interface_type == CUDA_INTF) &&
 		       (test >= MAX_CUDA_IMAGES)) ||
-	    ((interface_type == ONYX_INTF) && 
-		       (test >= MAX_ONYX_IMAGES))) 
+	    ((interface_type == ONYX_INTF) &&
+		       (test >= MAX_ONYX_IMAGES)))
 		return -EINVAL;
 
 	/* Copy the image into the processor */
-	if (interface_type == CUDA_INTF) 
+	if (interface_type == CUDA_INTF)
 		return perf_config(cuda_images[test]);
 	else
 		return perf_config(onyx_images[test]);
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 static void perf_patch_images(void)
 {
 #if 0 /* FIXME!! */
-/* 
+/*
  * NOTE:  this routine is VERY specific to the current TLB image.
  * If the image is changed, this routine might also need to be changed.
  */
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
 	extern void $i_dtlb_miss_2_0();
 	extern void PA2_0_iva();
 
-	/* 
+	/*
 	 * We can only use the lower 32-bits, the upper 32-bits should be 0
-	 * anyway given this is in the kernel 
+	 * anyway given this is in the kernel
 	 */
 	uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0);
 	uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0);
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
 
 	if (perf_processor_interface == ONYX_INTF) {
 		/* clear last 2 bytes */
-		onyx_images[TLBMISS][15] &= 0xffffff00;  
+		onyx_images[TLBMISS][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
 		onyx_images[TLBMISS][17] = itlb_addr;
 
 		/* clear last 2 bytes */
-		onyx_images[TLBHANDMISS][15] &= 0xffffff00;  
+		onyx_images[TLBHANDMISS][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
 		onyx_images[TLBHANDMISS][17] = itlb_addr;
 
 		/* clear last 2 bytes */
-		onyx_images[BIG_CPI][15] &= 0xffffff00;  
+		onyx_images[BIG_CPI][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
 
 	} else if (perf_processor_interface == CUDA_INTF) {
 		/* Cuda interface */
-		cuda_images[TLBMISS][16] =  
+		cuda_images[TLBMISS][16] =
 			(cuda_images[TLBMISS][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[TLBMISS][17] = 
+		cuda_images[TLBMISS][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-		cuda_images[TLBHANDMISS][16] = 
+		cuda_images[TLBHANDMISS][16] =
 			(cuda_images[TLBHANDMISS][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[TLBHANDMISS][17] = 
+		cuda_images[TLBHANDMISS][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-		cuda_images[BIG_CPI][16] = 
+		cuda_images[BIG_CPI][16] =
 			(cuda_images[BIG_CPI][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[BIG_CPI][17] = 
+		cuda_images[BIG_CPI][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
 	} else {
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
 
 /*
  * ioctl routine
- * All routines effect the processor that they are executed on.  Thus you 
+ * All routines effect the processor that they are executed on.  Thus you
  * must be running on the processor that you wish to change.
  */
 
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			}
 
 			/* copy out the Counters */
-			if (copy_to_user((void __user *)arg, raddr, 
+			if (copy_to_user((void __user *)arg, raddr,
 					sizeof (raddr)) != 0) {
 				error =  -EFAULT;
 				break;
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
 	.open = perf_open,
 	.release = perf_release
 };
-	
+
 static struct miscdevice perf_dev = {
 	MISC_DYNAMIC_MINOR,
 	PA_PERF_DEV,
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		/* OR sticky2 (bit 1496) to counter2 bit 32 */
 		tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
 		raddr[2] = (uint32_t)tmp64;
-		
+
 		/* Counter3 is bits 1497 to 1528 */
 		tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff;
 		/* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		userbuf[22] = 0;
 		userbuf[23] = 0;
 
-		/* 
+		/*
 		 * Write back the zeroed bytes + the image given
 		 * the read was destructive.
 		 */
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
 	} else {
 
 		/*
-		 * Read RDR-15 which contains the counters and sticky bits 
+		 * Read RDR-15 which contains the counters and sticky bits
 		 */
 		if (!perf_rdr_read_ubuf(15, userbuf)) {
 			return -13;
 		}
 
-		/* 
+		/*
 		 * Clear out the counters
 		 */
 		perf_rdr_clear(15);
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
 		raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
 	}
- 
+
 	return 0;
 }
 
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer)
 	i = tentry->num_words;
 	while (i--) {
 		buffer[i] = 0;
-	}	
+	}
 
 	/* Check for bits an even number of 64 */
 	if ((xbits = width & 0x03f) != 0) {
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
 	}
 
 	runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+	if (!runway) {
+		pr_err("perf_write_image: ioremap failed!\n");
+		return -ENOMEM;
+	}
 
 	/* Merge intrigue bits into Runway STATUS 0 */
 	tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
-	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 
+	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
 		     runway + RUNWAY_STATUS);
-	
+
 	/* Write RUNWAY DEBUG registers */
 	for (i = 0; i < 8; i++) {
 		__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
 	}
 
-	return 0; 
+	return 0;
 }
 
 /*
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
 			perf_rdr_shift_out_U(rdr_num, buffer[i]);
 		} else {
 			perf_rdr_shift_out_W(rdr_num, buffer[i]);
-		}	
+		}
 	}
 printk("perf_rdr_write done\n");
 }
-- 
cgit v1.2.3


From 73580dac7618e4bcd21679f553cf3c97323fec46 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 18 Mar 2017 17:13:27 +0100
Subject: parisc: Fix system shutdown halt

On those parisc machines which don't provide a software power off
function, the system currently kills the init process at the end of a
shutdown and unexpectedly restarts insteads of halting.
Fix it by adding a loop which will not return.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # 4.9+
---
 arch/parisc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 06f7ca7fe70b..b76f503eee4a 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -142,6 +142,8 @@ void machine_power_off(void)
 
 	printk(KERN_EMERG "System shut down completed.\n"
 	       "Please power this system off now.");
+
+	for (;;);
 }
 
 void (*pm_power_off)(void) = machine_power_off;
-- 
cgit v1.2.3


From 9c28ca4ff8bad7486182291a55b4f67a70af718d Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 8 Mar 2017 00:09:59 -0800
Subject: target: Drop pointless tfo->check_stop_free check

All in-tree fabric drivers provide a tfo->check_stop_free(),
so there is no need to do the extra check within existing
transport_cmd_check_stop_to_fabric() code.

Just to be sure, add a check in target_fabric_tf_ops_check()
to notify any out-of-tree drivers that might be missing it.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c  | 4 ++++
 drivers/target/target_core_transport.c | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 54b36c9835be..38b5025e4c7a 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
 		pr_err("Missing tfo->aborted_task()\n");
 		return -EINVAL;
 	}
+	if (!tfo->check_stop_free) {
+		pr_err("Missing tfo->check_stop_free()\n");
+		return -EINVAL;
+	}
 	/*
 	 * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn()
 	 * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 434d9d693989..b1a3cdb29468 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -636,8 +636,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
 	 * Fabric modules are expected to return '1' here if the se_cmd being
 	 * passed is released at this point, or zero if not being released.
 	 */
-	return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
-		: 0;
+	return cmd->se_tfo->check_stop_free(cmd);
 }
 
 static void transport_lun_remove_cmd(struct se_cmd *cmd)
-- 
cgit v1.2.3


From 3abaa2bfdb1e6bb33d38a2e82cf3bb82ec0197bf Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:14:39 -0600
Subject: tcmu: allow hw_max_sectors greater than 128

tcmu hard codes the hw_max_sectors to 128 which is a litle small.
Userspace uses the max_sectors to report the optimal IO size and
some initiators perform better with larger IOs (open-iscsi seems
to do better with 256 to 512 depending on the test).

(Fix do not display hw max sectors twice - MNC)

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 54 +++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c3adefe95e50..24e8580f07b8 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -960,7 +960,8 @@ static int tcmu_configure_device(struct se_device *dev)
 	if (dev->dev_attrib.hw_block_size == 0)
 		dev->dev_attrib.hw_block_size = 512;
 	/* Other attributes can be configured in userspace */
-	dev->dev_attrib.hw_max_sectors = 128;
+	if (!dev->dev_attrib.hw_max_sectors)
+		dev->dev_attrib.hw_max_sectors = 128;
 	dev->dev_attrib.hw_queue_depth = 128;
 
 	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
@@ -1031,16 +1032,42 @@ static void tcmu_free_device(struct se_device *dev)
 }
 
 enum {
-	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
+	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
+	Opt_err,
 };
 
 static match_table_t tokens = {
 	{Opt_dev_config, "dev_config=%s"},
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
+	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
 	{Opt_err, NULL}
 };
 
+static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib)
+{
+	unsigned long tmp_ul;
+	char *arg_p;
+	int ret;
+
+	arg_p = match_strdup(arg);
+	if (!arg_p)
+		return -ENOMEM;
+
+	ret = kstrtoul(arg_p, 0, &tmp_ul);
+	kfree(arg_p);
+	if (ret < 0) {
+		pr_err("kstrtoul() failed for dev attrib\n");
+		return ret;
+	}
+	if (!tmp_ul) {
+		pr_err("dev attrib must be nonzero\n");
+		return -EINVAL;
+	}
+	*dev_attrib = tmp_ul;
+	return 0;
+}
+
 static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 		const char *page, ssize_t count)
 {
@@ -1048,7 +1075,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 	char *orig, *ptr, *opts, *arg_p;
 	substring_t args[MAX_OPT_ARGS];
 	int ret = 0, token;
-	unsigned long tmp_ul;
 
 	opts = kstrdup(page, GFP_KERNEL);
 	if (!opts)
@@ -1082,22 +1108,12 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 				pr_err("kstrtoul() failed for dev_size=\n");
 			break;
 		case Opt_hw_block_size:
-			arg_p = match_strdup(&args[0]);
-			if (!arg_p) {
-				ret = -ENOMEM;
-				break;
-			}
-			ret = kstrtoul(arg_p, 0, &tmp_ul);
-			kfree(arg_p);
-			if (ret < 0) {
-				pr_err("kstrtoul() failed for hw_block_size=\n");
-				break;
-			}
-			if (!tmp_ul) {
-				pr_err("hw_block_size must be nonzero\n");
-				break;
-			}
-			dev->dev_attrib.hw_block_size = tmp_ul;
+			ret = tcmu_set_dev_attrib(&args[0],
+					&(dev->dev_attrib.hw_block_size));
+			break;
+		case Opt_hw_max_sectors:
+			ret = tcmu_set_dev_attrib(&args[0],
+					&(dev->dev_attrib.hw_max_sectors));
 			break;
 		default:
 			break;
-- 
cgit v1.2.3


From 2579325ca0acc598fdf41ba12b2871d3467f28df Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:14:40 -0600
Subject: tcmu: return on first Opt parse failure

We only were returing failure if the last opt to be parsed failed.
This has a return failure when we first detect a failure.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 24e8580f07b8..4339ab2133b3 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1118,6 +1118,9 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 		default:
 			break;
 		}
+
+		if (ret)
+			break;
 	}
 
 	kfree(orig);
-- 
cgit v1.2.3


From 530c6891b1220cba780b6c18f4691d85a3435080 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:13:24 -0600
Subject: target: allow ALUA setup for some passthrough backends

This patch allows passthrough backends to use the core/base LIO
ALUA setup and state checks, but still handle the execution of
commands.

This will allow the target_core_user module to execute STPG and RTPG
in userspace, and not have to duplicate the ALUA state checks, path
information (needed so we can check if command is executable on
specific paths) and setup (rtslib sets/updates the configfs ALUA
interface like it does for iblock or file).

For STPG, the target_core_user userspace daemon, tcmu-runner will
still execute the STPG, and to update the core/base LIO state it
will use the existing configfs interface. For RTPG, tcmu-runner
will loop over configfs and/or cache the state.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c    | 9 +++++----
 drivers/target/target_core_pscsi.c   | 3 ++-
 drivers/target/target_core_tpg.c     | 3 ++-
 include/target/target_core_backend.h | 7 ++++++-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index f5e330099bfc..a41bbb8087cf 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
 	if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
 		return 0;
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
 		return 0;
 
 	/*
@@ -1973,7 +1973,7 @@ ssize_t core_alua_store_tg_pt_gp_info(
 	unsigned char buf[TG_PT_GROUP_NAME_BUF];
 	int move = 0;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
 	    (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		return -ENODEV;
 
@@ -2230,7 +2230,7 @@ ssize_t core_alua_store_offline_bit(
 	unsigned long tmp;
 	int ret;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
 	    (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		return -ENODEV;
 
@@ -2316,7 +2316,8 @@ ssize_t core_alua_store_secondary_write_metadata(
 
 int core_setup_alua(struct se_device *dev)
 {
-	if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+	if (!(dev->transport->transport_flags &
+	     TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
 	    !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
 		struct t10_alua_lu_gp_member *lu_gp_mem;
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 44d92f23a3f0..94cda7991e80 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1080,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate)
 static const struct target_backend_ops pscsi_ops = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
-	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH,
+	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH |
+				  TRANSPORT_FLAG_PASSTHROUGH_ALUA,
 	.attach_hba		= pscsi_attach_hba,
 	.detach_hba		= pscsi_detach_hba,
 	.pmode_enable_hba	= pscsi_pmode_enable_hba,
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index c0dbfa016575..6fb191914f45 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -602,7 +602,8 @@ int core_tpg_add_lun(
 	if (ret)
 		goto out_kill_ref;
 
-	if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+	if (!(dev->transport->transport_flags &
+	     TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
 	    !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp);
 
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index b54b98dc2d4a..1b0f447ce850 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -4,7 +4,12 @@
 #include <linux/types.h>
 #include <target/target_core_base.h>
 
-#define TRANSPORT_FLAG_PASSTHROUGH		1
+#define TRANSPORT_FLAG_PASSTHROUGH		0x1
+/*
+ * ALUA commands, state checks and setup operations are handled by the
+ * backend module.
+ */
+#define TRANSPORT_FLAG_PASSTHROUGH_ALUA		0x2
 
 struct request_queue;
 struct scatterlist;
-- 
cgit v1.2.3


From 0a4145729871ef29afe8b0c57560a1f5bd736416 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:13:25 -0600
Subject: target: fail ALUA transitions for pscsi

We do not setup the LU group for pscsi devices, so if you write
a state to alua_access_state that will cause a transition you will
get a NULL pointer dereference.

This patch will fail attempts to try and transition the path
for backend devices that set the TRANSPORT_FLAG_PASSTHROUGH_ALUA
flag.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index a41bbb8087cf..5b5a1e250a65 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1149,6 +1149,9 @@ int core_alua_do_port_transition(
 	struct t10_alua_tg_pt_gp *tg_pt_gp;
 	int primary, valid_states, rc = 0;
 
+	if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
+		return -ENODEV;
+
 	valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
 	if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
 		return -EINVAL;
-- 
cgit v1.2.3


From 207ee84133c00a8a2a5bdec94df4a5b37d78881c Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:13:26 -0600
Subject: target: Use system workqueue for ALUA transitions

If tcmu-runner is processing a STPG and needs to change the kernel's
ALUA state then we cannot use the same work queue for task management
requests and ALUA transitions, because we could deadlock. The problem
occurs when a STPG times out before tcmu-runner is able to
call into target_tg_pt_gp_alua_access_state_store->
core_alua_do_port_transition -> core_alua_do_transition_tg_pt ->
queue_work. In this case, the tmr is on the work queue waiting for
the STPG to complete, but the STPG transition is now queued behind
the waiting tmr.

Note:
This bug will also be fixed by this patch:
http://www.spinics.net/lists/target-devel/msg14560.html
which switches the tmr code to use the system workqueues.

For both, I am not sure if we need a dedicated workqueue since
it is not a performance path and I do not think we need WQ_MEM_RECLAIM
to make forward progress to free up memory like the block layer does.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 5b5a1e250a65..58bf5e6350ac 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1121,13 +1121,11 @@ static int core_alua_do_transition_tg_pt(
 		unsigned long transition_tmo;
 
 		transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
-		queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-				   &tg_pt_gp->tg_pt_gp_transition_work,
-				   transition_tmo);
+		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work,
+				      transition_tmo);
 	} else {
 		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-				   &tg_pt_gp->tg_pt_gp_transition_work, 0);
+		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0);
 		wait_for_completion(&wait);
 		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
 	}
-- 
cgit v1.2.3


From d7175373f2745ed4abe5b388d5aabd06304f801e Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Mar 2017 04:59:48 -0600
Subject: target: fix ALUA transition timeout handling

The implicit transition time tells initiators the min time
to wait before timing out a transition. We currently schedule
the transition to occur in tg_pt_gp_implicit_trans_secs
seconds so there is no room for delays. If
core_alua_do_transition_tg_pt_work->core_alua_update_tpg_primary_metadata
needs to write out info to a remote file, then the initiator can
easily time out the operation.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 23 ++++++++---------------
 include/target/target_core_base.h |  2 +-
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 58bf5e6350ac..594807cd92cb 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1013,7 +1013,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
 static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 {
 	struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
+		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
 	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
 	bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
 			 ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
@@ -1076,13 +1076,12 @@ static int core_alua_do_transition_tg_pt(
 	/*
 	 * Flush any pending transitions
 	 */
-	if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
-	    atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
+	if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
 	    ALUA_ACCESS_STATE_TRANSITION) {
 		/* Just in case */
 		tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
 		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
 		wait_for_completion(&wait);
 		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
 		return 0;
@@ -1117,15 +1116,9 @@ static int core_alua_do_transition_tg_pt(
 	atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-	if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
-		unsigned long transition_tmo;
-
-		transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
-		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work,
-				      transition_tmo);
-	} else {
+	schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
+	if (explicit) {
 		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0);
 		wait_for_completion(&wait);
 		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
 	}
@@ -1696,8 +1689,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
 	mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
 	spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
 	atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-	INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-			  core_alua_do_transition_tg_pt_work);
+	INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
+		  core_alua_do_transition_tg_pt_work);
 	tg_pt_gp->tg_pt_gp_dev = dev;
 	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
 		ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
@@ -1805,7 +1798,7 @@ void core_alua_free_tg_pt_gp(
 	dev->t10_alua.alua_tg_pt_gps_counter--;
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-	flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+	flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
 
 	/*
 	 * Allow a struct t10_alua_tg_pt_gp_member * referenced by
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 37c274e61acc..4b784b6e21c0 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp {
 	struct list_head tg_pt_gp_lun_list;
 	struct se_lun *tg_pt_gp_alua_lun;
 	struct se_node_acl *tg_pt_gp_alua_nacl;
-	struct delayed_work tg_pt_gp_transition_work;
+	struct work_struct tg_pt_gp_transition_work;
 	struct completion *tg_pt_gp_transition_complete;
 };
 
-- 
cgit v1.2.3


From 1ca4d4fa3bfcbe8964f81e5818a9b90436466eb0 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Mar 2017 04:59:49 -0600
Subject: target: allow userspace to set state to transitioning

Userspace target_core_user handlers like tcmu-runner may want to set the
ALUA state to transitioning while it does implicit transitions. This
patch allows that state when set from configfs.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 594807cd92cb..252d4e4b7b33 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -43,7 +43,7 @@
 #include "target_core_ua.h"
 
 static sense_reason_t core_alua_check_transition(int state, int valid,
-						 int *primary);
+						 int *primary, int explicit);
 static int core_alua_set_tg_pt_secondary_state(
 		struct se_lun *lun, int explicit, int offline);
 
@@ -335,8 +335,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
 		 * the state is a primary or secondary target port asymmetric
 		 * access state.
 		 */
-		rc = core_alua_check_transition(alua_access_state,
-						valid_states, &primary);
+		rc = core_alua_check_transition(alua_access_state, valid_states,
+						&primary, 1);
 		if (rc) {
 			/*
 			 * If the SET TARGET PORT GROUPS attempts to establish
@@ -762,7 +762,7 @@ target_alua_state_check(struct se_cmd *cmd)
  * Check implicit and explicit ALUA state change request.
  */
 static sense_reason_t
-core_alua_check_transition(int state, int valid, int *primary)
+core_alua_check_transition(int state, int valid, int *primary, int explicit)
 {
 	/*
 	 * OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are
@@ -804,11 +804,14 @@ core_alua_check_transition(int state, int valid, int *primary)
 		*primary = 0;
 		break;
 	case ALUA_ACCESS_STATE_TRANSITION:
-		/*
-		 * Transitioning is set internally, and
-		 * cannot be selected manually.
-		 */
-		goto not_supported;
+		if (!(valid & ALUA_T_SUP) || explicit)
+			/*
+			 * Transitioning is set internally and by tcmu daemon,
+			 * and cannot be selected through a STPG.
+			 */
+			goto not_supported;
+		*primary = 0;
+		break;
 	default:
 		pr_err("Unknown ALUA access state: 0x%02x\n", state);
 		return TCM_INVALID_PARAMETER_LIST;
@@ -1070,7 +1073,7 @@ static int core_alua_do_transition_tg_pt(
 	if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
 		return 0;
 
-	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
 		return -EAGAIN;
 
 	/*
@@ -1091,10 +1094,6 @@ static int core_alua_do_transition_tg_pt(
 	 * Save the old primary ALUA access state, and set the current state
 	 * to ALUA_ACCESS_STATE_TRANSITION.
 	 */
-	tg_pt_gp->tg_pt_gp_alua_previous_state =
-		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-
 	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
 			ALUA_ACCESS_STATE_TRANSITION);
 	tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
@@ -1103,6 +1102,13 @@ static int core_alua_do_transition_tg_pt(
 
 	core_alua_queue_state_change_ua(tg_pt_gp);
 
+	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+		return 0;
+
+	tg_pt_gp->tg_pt_gp_alua_previous_state =
+		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
+
 	/*
 	 * Check for the optional ALUA primary state transition delay
 	 */
@@ -1144,7 +1150,8 @@ int core_alua_do_port_transition(
 		return -ENODEV;
 
 	valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
-	if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
+	if (core_alua_check_transition(new_state, valid_states, &primary,
+				       explicit) != 0)
 		return -EINVAL;
 
 	local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem;
-- 
cgit v1.2.3


From 760bf578edf8122f2503a3a6a3f4b0de3b6ce0bb Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Mar 2017 04:59:50 -0600
Subject: target: fix race during implicit transition work flushes

This fixes the following races:

1. core_alua_do_transition_tg_pt could have read
tg_pt_gp_alua_access_state and gone into this if chunk:

if (!explicit &&
        atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
           ALUA_ACCESS_STATE_TRANSITION) {

and then core_alua_do_transition_tg_pt_work could update the
state. core_alua_do_transition_tg_pt would then only set
tg_pt_gp_alua_pending_state and the tg_pt_gp_alua_access_state would
not get updated with the second calls state.

2. core_alua_do_transition_tg_pt could be setting
tg_pt_gp_transition_complete while the tg_pt_gp_transition_work
is already completing. core_alua_do_transition_tg_pt then waits on the
completion that will never be called.

To handle these issues, we just call flush_work which will return when
core_alua_do_transition_tg_pt_work has completed so there is no need
to do the complete/wait. And, if core_alua_do_transition_tg_pt_work
was running, instead of trying to sneak in the state change, we just
schedule up another core_alua_do_transition_tg_pt_work call.

Note that this does not handle a possible race where there are multiple
threads call core_alua_do_transition_tg_pt at the same time. I think
we need a mutex in target_tg_pt_gp_alua_access_state_store.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 252d4e4b7b33..fd7c16a7ca6e 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1079,16 +1079,8 @@ static int core_alua_do_transition_tg_pt(
 	/*
 	 * Flush any pending transitions
 	 */
-	if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
-	    ALUA_ACCESS_STATE_TRANSITION) {
-		/* Just in case */
-		tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
+	if (!explicit)
 		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
-		wait_for_completion(&wait);
-		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
-		return 0;
-	}
 
 	/*
 	 * Save the old primary ALUA access state, and set the current state
-- 
cgit v1.2.3


From 972c7f167974fa41ea8a2eed4b857cc59f59c42c Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 9 Mar 2017 02:42:08 -0600
Subject: tcmu: add helper to check if dev was configured

This adds a helper to check if the dev was configured. It
will be used in the next patch to prevent updates to some
config settings after the device has been setup.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 4339ab2133b3..892b311e7874 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -998,6 +998,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
 	kfree(udev);
 }
 
+static bool tcmu_dev_configured(struct tcmu_dev *udev)
+{
+	return udev->uio_info.uio_dev ? true : false;
+}
+
 static void tcmu_free_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
@@ -1019,8 +1024,7 @@ static void tcmu_free_device(struct se_device *dev)
 	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
-	/* Device was configured */
-	if (udev->uio_info.uio_dev) {
+	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
 				   udev->uio_info.uio_dev->minor);
 
-- 
cgit v1.2.3


From af980e46a26ac8805685bb70c8572dbc47abb126 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 9 Mar 2017 02:42:09 -0600
Subject: tcmu: make cmd timeout configurable

A single daemon could implement multiple types of devices
using multuple types of real devices that may not support
restarting from crashes and/or handling tcmu timeouts. This
makes the cmd timeout configurable, so handlers that do not
support it can turn if off for now.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 41 +++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 892b311e7874..10cc15f0b1fa 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -112,6 +112,7 @@ struct tcmu_dev {
 	spinlock_t commands_lock;
 
 	struct timer_list timeout;
+	unsigned int cmd_time_out;
 
 	char dev_config[TCMU_CONFIG_LEN];
 };
@@ -172,7 +173,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 
 	tcmu_cmd->se_cmd = se_cmd;
 	tcmu_cmd->tcmu_dev = udev;
-	tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+	if (udev->cmd_time_out)
+		tcmu_cmd->deadline = jiffies +
+					msecs_to_jiffies(udev->cmd_time_out);
 
 	idr_preload(GFP_KERNEL);
 	spin_lock_irq(&udev->commands_lock);
@@ -451,7 +454,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 		pr_debug("sleeping for ring space\n");
 		spin_unlock_irq(&udev->cmdr_lock);
-		ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+		if (udev->cmd_time_out)
+			ret = schedule_timeout(
+					msecs_to_jiffies(udev->cmd_time_out));
+		else
+			ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
 		finish_wait(&udev->wait_cmdr, &__wait);
 		if (!ret) {
 			pr_warn("tcmu: command timed out\n");
@@ -526,8 +533,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	/* TODO: only if FLUSH and FUA? */
 	uio_event_notify(&udev->uio_info);
 
-	mod_timer(&udev->timeout,
-		round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+	if (udev->cmd_time_out)
+		mod_timer(&udev->timeout, round_jiffies_up(jiffies +
+			  msecs_to_jiffies(udev->cmd_time_out)));
 
 	return TCM_NO_SENSE;
 }
@@ -742,6 +750,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	}
 
 	udev->hba = hba;
+	udev->cmd_time_out = TCMU_TIME_OUT;
 
 	init_waitqueue_head(&udev->wait_cmdr);
 	spin_lock_init(&udev->cmdr_lock);
@@ -1037,7 +1046,7 @@ static void tcmu_free_device(struct se_device *dev)
 
 enum {
 	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
-	Opt_err,
+	Opt_cmd_time_out, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -1045,6 +1054,7 @@ static match_table_t tokens = {
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
 	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
+	{Opt_cmd_time_out, "cmd_time_out=%u"},
 	{Opt_err, NULL}
 };
 
@@ -1111,6 +1121,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 			if (ret < 0)
 				pr_err("kstrtoul() failed for dev_size=\n");
 			break;
+		case Opt_cmd_time_out:
+			if (tcmu_dev_configured(udev)) {
+				pr_err("Can not update cmd_time_out after device has been configured.\n");
+				ret = -EINVAL;
+				break;
+			}
+			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
+			ret = kstrtouint(arg_p, 0, &udev->cmd_time_out);
+			kfree(arg_p);
+			if (ret < 0)
+				pr_err("kstrtouint() failed for cmd_time_out=\n");
+			udev->cmd_time_out *= MSEC_PER_SEC;
+			break;
 		case Opt_hw_block_size:
 			ret = tcmu_set_dev_attrib(&args[0],
 					&(dev->dev_attrib.hw_block_size));
@@ -1138,7 +1165,9 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
 
 	bl = sprintf(b + bl, "Config: %s ",
 		     udev->dev_config[0] ? udev->dev_config : "NULL");
-	bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size);
+	bl += sprintf(b + bl, "Size: %zu ", udev->dev_size);
+	bl += sprintf(b + bl, "Cmd Time Out: %lu\n",
+		      udev->cmd_time_out / MSEC_PER_SEC);
 
 	return bl;
 }
-- 
cgit v1.2.3


From 7d7a743543905a8297dce53b36e793e5307da5d7 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 18 Mar 2017 15:04:13 -0700
Subject: tcmu: Convert cmd_time_out into backend device attribute

Instead of putting cmd_time_out under ../target/core/user_0/foo/control,
which has historically been used by parameters needed for initial
backend device configuration, go ahead and move cmd_time_out into
a backend device attribute.

In order to do this, tcmu_module_init() has been updated to create
a local struct configfs_attribute **tcmu_attrs, that is based upon
the existing passthrough_attrib_attrs along with the new cmd_time_out
attribute.  Once **tcm_attrs has been setup, go ahead and point
it at tcmu_ops->tb_dev_attrib_attrs so it's picked up by target-core.

Also following MNC's previous change, ->cmd_time_out is stored in
milliseconds but exposed via configfs in seconds.  Also, note this
patch restricts the modification of ->cmd_time_out to before +
after the TCMU device has been configured, but not while it has
active fabric exports.

Cc: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 94 ++++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 26 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 10cc15f0b1fa..c6874c38a10b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -28,6 +28,7 @@
 #include <linux/stringify.h>
 #include <linux/bitops.h>
 #include <linux/highmem.h>
+#include <linux/configfs.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -1046,7 +1047,7 @@ static void tcmu_free_device(struct se_device *dev)
 
 enum {
 	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
-	Opt_cmd_time_out, Opt_err,
+	Opt_err,
 };
 
 static match_table_t tokens = {
@@ -1054,7 +1055,6 @@ static match_table_t tokens = {
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
 	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
-	{Opt_cmd_time_out, "cmd_time_out=%u"},
 	{Opt_err, NULL}
 };
 
@@ -1121,23 +1121,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 			if (ret < 0)
 				pr_err("kstrtoul() failed for dev_size=\n");
 			break;
-		case Opt_cmd_time_out:
-			if (tcmu_dev_configured(udev)) {
-				pr_err("Can not update cmd_time_out after device has been configured.\n");
-				ret = -EINVAL;
-				break;
-			}
-			arg_p = match_strdup(&args[0]);
-			if (!arg_p) {
-				ret = -ENOMEM;
-				break;
-			}
-			ret = kstrtouint(arg_p, 0, &udev->cmd_time_out);
-			kfree(arg_p);
-			if (ret < 0)
-				pr_err("kstrtouint() failed for cmd_time_out=\n");
-			udev->cmd_time_out *= MSEC_PER_SEC;
-			break;
 		case Opt_hw_block_size:
 			ret = tcmu_set_dev_attrib(&args[0],
 					&(dev->dev_attrib.hw_block_size));
@@ -1165,9 +1148,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
 
 	bl = sprintf(b + bl, "Config: %s ",
 		     udev->dev_config[0] ? udev->dev_config : "NULL");
-	bl += sprintf(b + bl, "Size: %zu ", udev->dev_size);
-	bl += sprintf(b + bl, "Cmd Time Out: %lu\n",
-		      udev->cmd_time_out / MSEC_PER_SEC);
+	bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size);
 
 	return bl;
 }
@@ -1186,7 +1167,48 @@ tcmu_parse_cdb(struct se_cmd *cmd)
 	return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
 }
 
-static const struct target_backend_ops tcmu_ops = {
+static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = container_of(da->da_dev,
+					struct tcmu_dev, se_dev);
+
+	return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
+				       size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = container_of(da->da_dev,
+					struct tcmu_dev, se_dev);
+	u32 val;
+	int ret;
+
+	if (da->da_dev->export_count) {
+		pr_err("Unable to set tcmu cmd_time_out while exports exist\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	if (!val) {
+		pr_err("Illegal value for cmd_time_out\n");
+		return -EINVAL;
+	}
+
+	udev->cmd_time_out = val * MSEC_PER_SEC;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, cmd_time_out);
+
+static struct configfs_attribute **tcmu_attrs;
+
+static struct target_backend_ops tcmu_ops = {
 	.name			= "user",
 	.owner			= THIS_MODULE,
 	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH,
@@ -1200,12 +1222,12 @@ static const struct target_backend_ops tcmu_ops = {
 	.show_configfs_dev_params = tcmu_show_configfs_dev_params,
 	.get_device_type	= sbc_get_device_type,
 	.get_blocks		= tcmu_get_blocks,
-	.tb_dev_attrib_attrs	= passthrough_attrib_attrs,
+	.tb_dev_attrib_attrs	= NULL,
 };
 
 static int __init tcmu_module_init(void)
 {
-	int ret;
+	int ret, i, len = 0;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
@@ -1227,12 +1249,31 @@ static int __init tcmu_module_init(void)
 		goto out_unreg_device;
 	}
 
+	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+		len += sizeof(struct configfs_attribute *);
+	}
+	len += sizeof(struct configfs_attribute *) * 2;
+
+	tcmu_attrs = kzalloc(len, GFP_KERNEL);
+	if (!tcmu_attrs) {
+		ret = -ENOMEM;
+		goto out_unreg_genl;
+	}
+
+	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+		tcmu_attrs[i] = passthrough_attrib_attrs[i];
+	}
+	tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+	tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
+
 	ret = transport_backend_register(&tcmu_ops);
 	if (ret)
-		goto out_unreg_genl;
+		goto out_attrs;
 
 	return 0;
 
+out_attrs:
+	kfree(tcmu_attrs);
 out_unreg_genl:
 	genl_unregister_family(&tcmu_genl_family);
 out_unreg_device:
@@ -1246,6 +1287,7 @@ out_free_cache:
 static void __exit tcmu_module_exit(void)
 {
 	target_backend_unregister(&tcmu_ops);
+	kfree(tcmu_attrs);
 	genl_unregister_family(&tcmu_genl_family);
 	root_device_unregister(tcmu_root_device);
 	kmem_cache_destroy(tcmu_cmd_cache);
-- 
cgit v1.2.3


From c4a9b538ab2a109c5f9798bea1f8f4bf93aadfb9 Mon Sep 17 00:00:00 2001
From: Joe Carnuccio <joe.carnuccio@cavium.com>
Date: Wed, 15 Mar 2017 09:48:43 -0700
Subject: qla2xxx: Allow vref count to timeout on vport delete.

Cc: <stable@vger.kernel.org>
Signed-off-by: Joe Carnuccio <joe.carnuccio@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_attr.c |  4 +---
 drivers/scsi/qla2xxx/qla_def.h  |  6 +++++-
 drivers/scsi/qla2xxx/qla_init.c |  1 +
 drivers/scsi/qla2xxx/qla_mid.c  | 14 ++++++++------
 drivers/scsi/qla2xxx/qla_os.c   |  1 +
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index f610103994af..435ff7fd6384 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 		    "Timer for the VP[%d] has stopped\n", vha->vp_idx);
 	}
 
-	BUG_ON(atomic_read(&vha->vref_count));
-
 	qla2x00_free_fcports(vha);
 
 	mutex_lock(&ha->vport_lock);
@@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 	dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
 	    vha->gnl.ldma);
 
-	if (vha->qpair->vp_idx == vha->vp_idx) {
+	if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) {
 		if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
 			ql_log(ql_log_warn, vha, 0x7087,
 			    "Queue Pair delete failed.\n");
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 625d438e3cce..8662ef4192db 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -4076,6 +4076,7 @@ typedef struct scsi_qla_host {
 	/* Count of active session/fcport */
 	int fcport_count;
 	wait_queue_head_t fcport_waitQ;
+	wait_queue_head_t vref_waitq;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
@@ -4131,14 +4132,17 @@ struct qla2_sgx {
 	mb();						\
 	if (__vha->flags.delete_progress) {		\
 		atomic_dec(&__vha->vref_count);		\
+		wake_up(&__vha->vref_waitq);		\
 		__bail = 1;				\
 	} else {					\
 		__bail = 0;				\
 	}						\
 } while (0)
 
-#define QLA_VHA_MARK_NOT_BUSY(__vha)			\
+#define QLA_VHA_MARK_NOT_BUSY(__vha) do {		\
 	atomic_dec(&__vha->vref_count);			\
+	wake_up(&__vha->vref_waitq);			\
+} while (0)						\
 
 #define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do {	\
 	atomic_inc(&__qpair->ref_count);		\
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 32fb9007f137..9f3740c68cc8 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -5148,6 +5148,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
 			}
 		}
 		atomic_dec(&vha->vref_count);
+		wake_up(&vha->vref_waitq);
 	}
 	spin_unlock_irqrestore(&ha->vport_slock, flags);
 }
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index c6d6f0d912ff..09a490c98763 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
 	 * ensures no active vp_list traversal while the vport is removed
 	 * from the queue)
 	 */
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	while (atomic_read(&vha->vref_count)) {
-		spin_unlock_irqrestore(&ha->vport_slock, flags);
-
-		msleep(500);
+	wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
+	    10*HZ);
 
-		spin_lock_irqsave(&ha->vport_slock, flags);
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	if (atomic_read(&vha->vref_count)) {
+		ql_dbg(ql_dbg_vport, vha, 0xfffa,
+		    "vha->vref_count=%u timeout\n", vha->vref_count.counter);
+		vha->vref_count = (atomic_t)ATOMIC_INIT(0);
 	}
 	list_del(&vha->list);
 	qlt_update_vp_map(vha, RESET_VP_IDX);
@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 
 			spin_lock_irqsave(&ha->vport_slock, flags);
 			atomic_dec(&vha->vref_count);
+			wake_up(&vha->vref_waitq);
 		}
 		i++;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1fed235a1b4a..54d4e802bde0 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4268,6 +4268,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
+	init_waitqueue_head(&vha->vref_waitq);
 
 	vha->gnl.size = sizeof(struct get_name_list_extended) *
 			(ha->max_loop_id + 1);
-- 
cgit v1.2.3


From ae940f2c472a62904dc18234de5cf3ed28f195ee Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:44 -0700
Subject: qla2xxx: Fix memory leak for abts processing

Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 45f5077684f0..ecf97c5993e8 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6642,6 +6642,8 @@ qlt_handle_abts_recv_work(struct work_struct *work)
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_response_pkt_all_vps(vha, (response_t *)&op->atio);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+	kfree(op);
 }
 
 void
-- 
cgit v1.2.3


From 8b666809e10cda9814af3e8be339d35b83909056 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:45 -0700
Subject: qla2xxx: Fix request queue corruption.

When FW notify driver or driver detects low FW resource,
driver tries to send out Busy SCSI Status to tell Initiator
side to back off. During the send process, the lock was not held.

Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index ecf97c5993e8..a463bcc57902 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -5079,16 +5079,22 @@ qlt_send_busy(struct scsi_qla_host *vha,
 
 static int
 qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha,
-	struct atio_from_isp *atio)
+	struct atio_from_isp *atio, bool ha_locked)
 {
 	struct qla_hw_data *ha = vha->hw;
 	uint16_t status;
+	unsigned long flags;
 
 	if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha))
 		return 0;
 
+	if (!ha_locked)
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 	status = temp_sam_status;
 	qlt_send_busy(vha, atio, status);
+	if (!ha_locked)
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	return 1;
 }
 
@@ -5133,7 +5139,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 
 
 		if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) {
-			rc = qlt_chk_qfull_thresh_hold(vha, atio);
+			rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked);
 			if (rc != 0) {
 				tgt->atio_irq_cmd_count--;
 				return;
@@ -5256,7 +5262,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
 			break;
 		}
 
-		rc = qlt_chk_qfull_thresh_hold(vha, atio);
+		rc = qlt_chk_qfull_thresh_hold(vha, atio, true);
 		if (rc != 0) {
 			tgt->irq_cmd_count--;
 			return;
-- 
cgit v1.2.3


From 8f6fc8d4e7ae2347d6261d11a7eb2b247d2954d8 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:46 -0700
Subject: qla2xxx: Fix inadequate lock protection for ABTS.

Normally, ABTS is sent to Target Core as Task MGMT command.
In the case of error, qla2xxx needs to send response, hardware_lock
is required to prevent request queue corruption.

Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index a463bcc57902..a78c3e9bcb57 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
 	fc_port_t *fcport, bool local);
 void qlt_unreg_sess(struct fc_port *sess);
+static void qlt_24xx_handle_abts(struct scsi_qla_host *,
+	struct abts_recv_from_24xx *);
+
 /*
  * Global Variables
  */
@@ -389,6 +392,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 			(struct abts_recv_from_24xx *)atio;
 		struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
 			entry->vp_index);
+		unsigned long flags;
+
 		if (unlikely(!host)) {
 			ql_dbg(ql_dbg_tgt, vha, 0xffff,
 			    "qla_target(%d): Response pkt (ABTS_RECV_24XX) "
@@ -396,9 +401,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 			    vha->vp_idx, entry->vp_index);
 			break;
 		}
-		qlt_response_pkt(host, (response_t *)atio);
+		if (!ha_locked)
+			spin_lock_irqsave(&host->hw->hardware_lock, flags);
+		qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio);
+		if (!ha_locked)
+			spin_unlock_irqrestore(&host->hw->hardware_lock, flags);
 		break;
-
 	}
 
 	/* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */
-- 
cgit v1.2.3


From f159b3c7cd45c550d0f73806451a10b6b6bc08ae Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:47 -0700
Subject: qla2xxx: Fix sess_lock & hardware_lock lock order problem.

The main lock that needs to be held for CMD or TMR submission
to upper layer is the sess_lock. The sess_lock is used to
serialize cmd submission and session deletion. The addition
of hardware_lock being held is not necessary. This patch removes
hardware_lock dependency from CMD/TMR submission.

Use hardware_lock only for error response in this case.

Path1
       CPU0                    CPU1
       ----                    ----
  lock(&(&ha->tgt.sess_lock)->rlock);
                               lock(&(&ha->hardware_lock)->rlock);
                               lock(&(&ha->tgt.sess_lock)->rlock);
  lock(&(&ha->hardware_lock)->rlock);

Path2/deadlock
*** DEADLOCK ***
Call Trace:
dump_stack+0x85/0xc2
print_circular_bug+0x1e3/0x250
__lock_acquire+0x1425/0x1620
lock_acquire+0xbf/0x210
_raw_spin_lock_irqsave+0x53/0x70
qlt_sess_work_fn+0x21d/0x480 [qla2xxx]
process_one_work+0x1f4/0x6e0

Cc: <stable@vger.kernel.org>
Cc: Bart Van Assche <Bart.VanAssche@sandisk.com>
Reported-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 41 +++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index a78c3e9bcb57..989f931af156 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -5727,30 +5727,23 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 		}
 	}
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-
-	if (tgt->tgt_stop)
-		goto out_term;
-
 	rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess);
+	ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
+
 	if (rc != 0)
 		goto out_term;
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 	return;
 
 out_term2:
-	spin_lock_irqsave(&ha->hardware_lock, flags);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 
 out_term:
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
 static void qlt_tmr_work(struct qla_tgt *tgt,
@@ -5770,7 +5763,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 
 	if (tgt->tgt_stop)
-		goto out_term;
+		goto out_term2;
 
 	s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id;
 	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
@@ -5782,11 +5775,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		if (!sess)
-			goto out_term;
+			goto out_term2;
 	} else {
 		if (sess->deleted) {
 			sess = NULL;
-			goto out_term;
+			goto out_term2;
 		}
 
 		if (!kref_get_unless_zero(&sess->sess_kref)) {
@@ -5794,7 +5787,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 			    "%s: kref_get fail %8phC\n",
 			     __func__, sess->port_name);
 			sess = NULL;
-			goto out_term;
+			goto out_term2;
 		}
 	}
 
@@ -5804,17 +5797,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
 
 	rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
-	if (rc != 0)
-		goto out_term;
-
 	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+	if (rc != 0)
+		goto out_term;
 	return;
 
+out_term2:
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 out_term:
 	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-	ha->tgt.tgt_ops->put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
 static void qlt_sess_work_fn(struct work_struct *work)
-- 
cgit v1.2.3


From 5b33469a055c77001fd2c62b0f985c991b0e5b65 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:48 -0700
Subject: qla2xxx: Allow relogin to proceed if remote login did not finish

If the remote port have started the login process, then the
PLOGI and PRLI should be back to back. Driver will allow
the remote port to complete the process. For the case where
the remote port decide to back off from sending PRLI, this
local port sets an expiration timer for the PRLI. Once the
expiration time passes, the relogin retry logic is allowed
to go through and perform login with the remote port.

Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  2 ++
 drivers/scsi/qla2xxx/qla_init.c   | 12 ++++++++++--
 drivers/scsi/qla2xxx/qla_isr.c    | 25 +++++++++++++++++++------
 drivers/scsi/qla2xxx/qla_target.c |  1 +
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 8662ef4192db..56cd45fc600a 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2300,6 +2300,8 @@ typedef struct fc_port {
 	struct ct_sns_desc ct_desc;
 	enum discovery_state disc_state;
 	enum login_state fw_login_state;
+	unsigned long plogi_nack_done_deadline;
+
 	u32 login_gen, last_login_gen;
 	u32 rscn_gen, last_rscn_gen;
 	u32 chip_reset;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9f3740c68cc8..a7865a5d556d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -876,10 +876,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
 	fcport->login_retry--;
 
 	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
 	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
 		return 0;
 
+	if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+		if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+			return 0;
+	}
+
 	/* for pure Target Mode. Login will not be initiated */
 	if (vha->host->active_mode == MODE_TARGET)
 		return 0;
@@ -1041,10 +1045,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 		fcport->flags);
 
 	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
 	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
 		return;
 
+	if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+		if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+			return;
+	}
+
 	if (fcport->flags & FCF_ASYNC_SENT) {
 		fcport->login_retry++;
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 3c66ea29de27..b2c6da752edd 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1620,9 +1620,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 		QLA_LOGIO_LOGIN_RETRIED : 0;
 	if (logio->entry_status) {
 		ql_log(ql_log_warn, fcport->vha, 0x5034,
-		    "Async-%s error entry - hdl=%x"
+		    "Async-%s error entry - %8phC hdl=%x"
 		    "portid=%02x%02x%02x entry-status=%x.\n",
-		    type, sp->handle, fcport->d_id.b.domain,
+		    type, fcport->port_name, sp->handle, fcport->d_id.b.domain,
 		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 		    logio->entry_status);
 		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d,
@@ -1633,8 +1633,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) {
 		ql_dbg(ql_dbg_async, fcport->vha, 0x5036,
-		    "Async-%s complete - hdl=%x portid=%02x%02x%02x "
-		    "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+		    "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x "
+		    "iop0=%x.\n", type, fcport->port_name, sp->handle,
+		    fcport->d_id.b.domain,
 		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 		    le32_to_cpu(logio->io_parameter[0]));
 
@@ -1674,6 +1675,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	case LSC_SCODE_NPORT_USED:
 		data[0] = MBS_LOOP_ID_USED;
 		break;
+	case LSC_SCODE_CMD_FAILED:
+		if (iop[1] == 0x0606) {
+			/*
+			 * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI,
+			 * Target side acked.
+			 */
+			data[0] = MBS_COMMAND_COMPLETE;
+			goto logio_done;
+		}
+		data[0] = MBS_COMMAND_ERROR;
+		break;
 	case LSC_SCODE_NOXCB:
 		vha->hw->exch_starvation++;
 		if (vha->hw->exch_starvation > 5) {
@@ -1695,8 +1707,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	}
 
 	ql_dbg(ql_dbg_async, fcport->vha, 0x5037,
-	    "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x "
-	    "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+	    "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x "
+	    "iop0=%x iop1=%x.\n", type, fcport->port_name,
+		sp->handle, fcport->d_id.b.domain,
 	    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 	    le16_to_cpu(logio->comp_status),
 	    le32_to_cpu(logio->io_parameter[0]),
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 989f931af156..925d9b858b24 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -562,6 +562,7 @@ void qla2x00_async_nack_sp_done(void *s, int res)
 		sp->fcport->login_gen++;
 		sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
 		sp->fcport->logout_on_delete = 1;
+		sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
 		break;
 
 	case SRB_NACK_PRLI:
-- 
cgit v1.2.3


From be25152c0d9e236076323abbe9def9714234b761 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:49 -0700
Subject: qla2xxx: Improve T10-DIF/PI handling in driver.

Add routines to support T10 DIF tag.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_dbg.h     |   1 +
 drivers/scsi/qla2xxx/qla_def.h     |  10 +
 drivers/scsi/qla2xxx/qla_gbl.h     |   6 +-
 drivers/scsi/qla2xxx/qla_iocb.c    |  13 +-
 drivers/scsi/qla2xxx/qla_target.c  | 540 ++++++++++++++++++++++---------------
 drivers/scsi/qla2xxx/qla_target.h  |  38 ++-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c |  49 ++--
 7 files changed, 406 insertions(+), 251 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index e1fc4e66966a..c6bffe929fe7 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...);
 #define ql_dbg_tgt	0x00004000 /* Target mode */
 #define ql_dbg_tgt_mgt	0x00002000 /* Target mode management */
 #define ql_dbg_tgt_tmr	0x00001000 /* Target mode task management */
+#define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
 
 extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
 	uint32_t, void **);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 56cd45fc600a..9d1d3dcf1c87 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3127,6 +3127,16 @@ struct bidi_statistics {
 	unsigned long long transfer_bytes;
 };
 
+struct qla_tc_param {
+	struct scsi_qla_host *vha;
+	uint32_t blk_sz;
+	uint32_t bufflen;
+	struct scatterlist *sg;
+	struct scatterlist *prot_sg;
+	struct crc_context *ctx;
+	uint8_t *ctx_dsd_alloced;
+};
+
 /* Multi queue support */
 #define MBC_INITIALIZE_MULTIQ 0x1f
 #define QLA_QUE_PAGE 0X1000
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index b3d6441d1d90..ca6f122e5865 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -256,11 +256,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
 extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *);
 extern int qla2x00_issue_marker(scsi_qla_host_t *, int);
 extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
 extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
 extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *,
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 535079280288..ea027f6a7fd4 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
 
 int
 qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
-	uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+	uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
 	struct scatterlist *sg_prot;
 	uint32_t *cur_dsd = dsd;
 	uint16_t	used_dsds = tot_dsds;
-
 	uint32_t	prot_int; /* protection interval */
 	uint32_t	partial;
 	struct qla2_sgx sgx;
@@ -966,7 +965,7 @@ alloc_and_fill:
 			} else {
 				list_add_tail(&dsd_ptr->list,
 				    &(tc->ctx->dsd_list));
-				tc->ctx_dsd_alloced = 1;
+				*tc->ctx_dsd_alloced = 1;
 			}
 
 
@@ -1005,7 +1004,7 @@ alloc_and_fill:
 
 int
 qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
-	uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+	uint16_t tot_dsds, struct qla_tc_param *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 			} else {
 				list_add_tail(&dsd_ptr->list,
 				    &(tc->ctx->dsd_list));
-				tc->ctx_dsd_alloced = 1;
+				*tc->ctx_dsd_alloced = 1;
 			}
 
 			/* add new list to cmd iocb or last list */
@@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 
 int
 qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
-	uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+	uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 			} else {
 				list_add_tail(&dsd_ptr->list,
 				    &(tc->ctx->dsd_list));
-				tc->ctx_dsd_alloced = 1;
+				*tc->ctx_dsd_alloced = 1;
 			}
 
 			/* add new list to cmd iocb or last list */
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 925d9b858b24..532004981dbd 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -143,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq;
 static DEFINE_MUTEX(qla_tgt_mutex);
 static LIST_HEAD(qla_tgt_glist);
 
+static const char *prot_op_str(u32 prot_op)
+{
+	switch (prot_op) {
+	case TARGET_PROT_NORMAL:	return "NORMAL";
+	case TARGET_PROT_DIN_INSERT:	return "DIN_INSERT";
+	case TARGET_PROT_DOUT_INSERT:	return "DOUT_INSERT";
+	case TARGET_PROT_DIN_STRIP:	return "DIN_STRIP";
+	case TARGET_PROT_DOUT_STRIP:	return "DOUT_STRIP";
+	case TARGET_PROT_DIN_PASS:	return "DIN_PASS";
+	case TARGET_PROT_DOUT_PASS:	return "DOUT_PASS";
+	default:			return "UNKNOWN";
+	}
+}
+
 /* This API intentionally takes dest as a parameter, rather than returning
  * int value to avoid caller forgetting to issue wmb() after the store */
 void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
@@ -2022,6 +2036,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
 }
 EXPORT_SYMBOL(qlt_free_mcmd);
 
+/*
+ * ha->hardware_lock supposed to be held on entry. Might drop it, then
+ * reacquire
+ */
+void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
+    uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq)
+{
+	struct atio_from_isp *atio = &cmd->atio;
+	struct ctio7_to_24xx *ctio;
+	uint16_t temp;
+
+	ql_dbg(ql_dbg_tgt_dif, vha, 0x3066,
+	    "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, "
+	    "sense_key=%02x, asc=%02x, ascq=%02x",
+	    vha, atio, scsi_status, sense_key, asc, ascq);
+
+	ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL);
+	if (!ctio) {
+		ql_dbg(ql_dbg_async, vha, 0x3067,
+		    "qla2x00t(%ld): %s failed: unable to allocate request packet",
+		    vha->host_no, __func__);
+		goto out;
+	}
+
+	ctio->entry_type = CTIO_TYPE7;
+	ctio->entry_count = 1;
+	ctio->handle = QLA_TGT_SKIP_HANDLE;
+	ctio->nport_handle = cmd->sess->loop_id;
+	ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
+	ctio->vp_index = vha->vp_idx;
+	ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
+	ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
+	ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+	ctio->exchange_addr = atio->u.isp24.exchange_addr;
+	ctio->u.status1.flags = (atio->u.isp24.attr << 9) |
+	    cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS);
+	temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+	ctio->u.status1.ox_id = cpu_to_le16(temp);
+	ctio->u.status1.scsi_status =
+	    cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status);
+	ctio->u.status1.response_len = cpu_to_le16(18);
+	ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio));
+
+	if (ctio->u.status1.residual != 0)
+		ctio->u.status1.scsi_status |=
+		    cpu_to_le16(SS_RESIDUAL_UNDER);
+
+	/* Response code and sense key */
+	put_unaligned_le32(((0x70 << 24) | (sense_key << 8)),
+	    (&ctio->u.status1.sense_data)[0]);
+	/* Additional sense length */
+	put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]);
+	/* ASC and ASCQ */
+	put_unaligned_le32(((asc << 24) | (ascq << 16)),
+	    (&ctio->u.status1.sense_data)[3]);
+
+	/* Memory Barrier */
+	wmb();
+
+	qla2x00_start_iocbs(vha, vha->req);
+out:
+	return;
+}
+
 /* callback from target fabric module code */
 void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 {
@@ -2270,7 +2348,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm,
 		 */
 		return -EAGAIN;
 	} else
-		ha->tgt.cmds[h-1] = prm->cmd;
+		ha->tgt.cmds[h - 1] = prm->cmd;
 
 	pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK;
 	pkt->nport_handle = prm->cmd->loop_id;
@@ -2400,6 +2478,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd)
 	return cmd->bufflen > 0;
 }
 
+static void qlt_print_dif_err(struct qla_tgt_prm *prm)
+{
+	struct qla_tgt_cmd *cmd;
+	struct scsi_qla_host *vha;
+
+	/* asc 0x10=dif error */
+	if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) {
+		cmd = prm->cmd;
+		vha = cmd->vha;
+		/* ASCQ */
+		switch (prm->sense_buffer[13]) {
+		case 1:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		case 2:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		case 3:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		default:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected Dif ERR: lba[%llx|%lld] len[%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		}
+		ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16);
+	}
+}
+
 /*
  * Called without ha->hardware_lock held
  */
@@ -2521,18 +2643,9 @@ skip_explict_conf:
 		for (i = 0; i < prm->sense_buffer_len/4; i++)
 			((uint32_t *)ctio->u.status1.sense_data)[i] =
 				cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]);
-#if 0
-		if (unlikely((prm->sense_buffer_len % 4) != 0)) {
-			static int q;
-			if (q < 10) {
-				ql_dbg(ql_dbg_tgt, vha, 0xe04f,
-				    "qla_target(%d): %d bytes of sense "
-				    "lost", prm->tgt->ha->vp_idx,
-				    prm->sense_buffer_len % 4);
-				q++;
-			}
-		}
-#endif
+
+		qlt_print_dif_err(prm);
+
 	} else {
 		ctio->u.status1.flags &=
 		    ~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0);
@@ -2546,19 +2659,9 @@ skip_explict_conf:
 	/* Sense with len > 24, is it possible ??? */
 }
 
-
-
-/* diff  */
 static inline int
 qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
 {
-	/*
-	 * Uncomment when corresponding SCSI changes are done.
-	 *
-	 if (!sp->cmd->prot_chk)
-	 return 0;
-	 *
-	 */
 	switch (se_cmd->prot_op) {
 	case TARGET_PROT_DOUT_INSERT:
 	case TARGET_PROT_DIN_STRIP:
@@ -2579,16 +2682,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
 	return 0;
 }
 
+static inline int
+qla_tgt_ref_mask_check(struct se_cmd *se_cmd)
+{
+	switch (se_cmd->prot_op) {
+	case TARGET_PROT_DIN_INSERT:
+	case TARGET_PROT_DOUT_INSERT:
+	case TARGET_PROT_DIN_STRIP:
+	case TARGET_PROT_DOUT_STRIP:
+	case TARGET_PROT_DIN_PASS:
+	case TARGET_PROT_DOUT_PASS:
+	    return 1;
+	default:
+	    return 0;
+	}
+	return 0;
+}
+
 /*
- * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command
- *
+ * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command
  */
-static inline void
-qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
+static void
+qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx,
+    uint16_t *pfw_prot_opts)
 {
+	struct se_cmd *se_cmd = &cmd->se_cmd;
 	uint32_t lba = 0xffffffff & se_cmd->t_task_lba;
+	scsi_qla_host_t *vha = cmd->tgt->vha;
+	struct qla_hw_data *ha = vha->hw;
+	uint32_t t32 = 0;
 
-	/* wait til Mode Sense/Select cmd, modepage Ah, subpage 2
+	/*
+	 * wait till Mode Sense/Select cmd, modepage Ah, subpage 2
 	 * have been immplemented by TCM, before AppTag is avail.
 	 * Look for modesense_handlers[]
 	 */
@@ -2596,65 +2721,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
 	ctx->app_tag_mask[0] = 0x0;
 	ctx->app_tag_mask[1] = 0x0;
 
+	if (IS_PI_UNINIT_CAPABLE(ha)) {
+		if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) ||
+		    (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT))
+			*pfw_prot_opts |= PO_DIS_VALD_APP_ESC;
+		else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT)
+			*pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC;
+	}
+
+	t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts);
+
 	switch (se_cmd->prot_type) {
 	case TARGET_DIF_TYPE0_PROT:
 		/*
-		 * No check for ql2xenablehba_err_chk, as it would be an
-		 * I/O error if hba tag generation is not done.
+		 * No check for ql2xenablehba_err_chk, as it
+		 * would be an I/O error if hba tag generation
+		 * is not done.
 		 */
 		ctx->ref_tag = cpu_to_le32(lba);
-
-		if (!qlt_hba_err_chk_enabled(se_cmd))
-			break;
-
 		/* enable ALL bytes of the ref tag */
 		ctx->ref_tag_mask[0] = 0xff;
 		ctx->ref_tag_mask[1] = 0xff;
 		ctx->ref_tag_mask[2] = 0xff;
 		ctx->ref_tag_mask[3] = 0xff;
 		break;
-	/*
-	 * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
-	 * 16 bit app tag.
-	 */
 	case TARGET_DIF_TYPE1_PROT:
-		ctx->ref_tag = cpu_to_le32(lba);
-
-		if (!qlt_hba_err_chk_enabled(se_cmd))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		ctx->ref_tag_mask[0] = 0xff;
-		ctx->ref_tag_mask[1] = 0xff;
-		ctx->ref_tag_mask[2] = 0xff;
-		ctx->ref_tag_mask[3] = 0xff;
-		break;
-	/*
-	 * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
-	 * match LBA in CDB + N
-	 */
+	    /*
+	     * For TYPE 1 protection: 16 bit GUARD tag, 32 bit
+	     * REF tag, and 16 bit app tag.
+	     */
+	    ctx->ref_tag = cpu_to_le32(lba);
+	    if (!qla_tgt_ref_mask_check(se_cmd) ||
+		!(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+		    *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+		    break;
+	    }
+	    /* enable ALL bytes of the ref tag */
+	    ctx->ref_tag_mask[0] = 0xff;
+	    ctx->ref_tag_mask[1] = 0xff;
+	    ctx->ref_tag_mask[2] = 0xff;
+	    ctx->ref_tag_mask[3] = 0xff;
+	    break;
 	case TARGET_DIF_TYPE2_PROT:
-		ctx->ref_tag = cpu_to_le32(lba);
-
-		if (!qlt_hba_err_chk_enabled(se_cmd))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		ctx->ref_tag_mask[0] = 0xff;
-		ctx->ref_tag_mask[1] = 0xff;
-		ctx->ref_tag_mask[2] = 0xff;
-		ctx->ref_tag_mask[3] = 0xff;
-		break;
-
-	/* For Type 3 protection: 16 bit GUARD only */
+	    /*
+	     * For TYPE 2 protection: 16 bit GUARD + 32 bit REF
+	     * tag has to match LBA in CDB + N
+	     */
+	    ctx->ref_tag = cpu_to_le32(lba);
+	    if (!qla_tgt_ref_mask_check(se_cmd) ||
+		!(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+		    *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+		    break;
+	    }
+	    /* enable ALL bytes of the ref tag */
+	    ctx->ref_tag_mask[0] = 0xff;
+	    ctx->ref_tag_mask[1] = 0xff;
+	    ctx->ref_tag_mask[2] = 0xff;
+	    ctx->ref_tag_mask[3] = 0xff;
+	    break;
 	case TARGET_DIF_TYPE3_PROT:
-		ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
-			ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
-		break;
+	    /* For TYPE 3 protection: 16 bit GUARD only */
+	    *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+	    ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
+		ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
+	    break;
 	}
 }
 
-
 static inline int
 qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 {
@@ -2673,6 +2806,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	struct se_cmd		*se_cmd = &cmd->se_cmd;
 	uint32_t h;
 	struct atio_from_isp *atio = &prm->cmd->atio;
+	struct qla_tc_param	tc;
 	uint16_t t16;
 
 	ha = vha->hw;
@@ -2698,16 +2832,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	case TARGET_PROT_DIN_INSERT:
 	case TARGET_PROT_DOUT_STRIP:
 		transfer_length = data_bytes;
-		data_bytes += dif_bytes;
+		if (cmd->prot_sg_cnt)
+			data_bytes += dif_bytes;
 		break;
-
 	case TARGET_PROT_DIN_STRIP:
 	case TARGET_PROT_DOUT_INSERT:
 	case TARGET_PROT_DIN_PASS:
 	case TARGET_PROT_DOUT_PASS:
 		transfer_length = data_bytes + dif_bytes;
 		break;
-
 	default:
 		BUG();
 		break;
@@ -2743,7 +2876,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 		break;
 	}
 
-
 	/* ---- PKT ---- */
 	/* Update entry type to indicate Command Type CRC_2 IOCB */
 	pkt->entry_type  = CTIO_CRC2;
@@ -2761,9 +2893,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	} else
 		ha->tgt.cmds[h-1] = prm->cmd;
 
-
 	pkt->handle  = h | CTIO_COMPLETION_HANDLE_MARK;
-	pkt->nport_handle = prm->cmd->loop_id;
+	pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
 	pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
 	pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
 	pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
@@ -2784,12 +2915,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	else if (cmd->dma_data_direction == DMA_FROM_DEVICE)
 		pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT);
 
-
 	pkt->dseg_count = prm->tot_dsds;
 	/* Fibre channel byte count */
 	pkt->transfer_length = cpu_to_le32(transfer_length);
 
-
 	/* ----- CRC context -------- */
 
 	/* Allocate CRC context from global pool */
@@ -2809,13 +2938,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	/* Set handle */
 	crc_ctx_pkt->handle = pkt->handle;
 
-	qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt);
+	qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts);
 
 	pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
 	pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma));
 	pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
 
-
 	if (!bundling) {
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address;
 	} else {
@@ -2836,16 +2964,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes);
 	crc_ctx_pkt->guard_seed = cpu_to_le16(0);
 
+	memset((uint8_t *)&tc, 0 , sizeof(tc));
+	tc.vha = vha;
+	tc.blk_sz = cmd->blk_sz;
+	tc.bufflen = cmd->bufflen;
+	tc.sg = cmd->sg;
+	tc.prot_sg = cmd->prot_sg;
+	tc.ctx = crc_ctx_pkt;
+	tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced;
 
 	/* Walks data segments */
 	pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR);
 
 	if (!bundling && prm->prot_seg_cnt) {
 		if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd,
-			prm->tot_dsds, cmd))
+			prm->tot_dsds, &tc))
 			goto crc_queuing_error;
 	} else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd,
-		(prm->tot_dsds - prm->prot_seg_cnt), cmd))
+		(prm->tot_dsds - prm->prot_seg_cnt), &tc))
 		goto crc_queuing_error;
 
 	if (bundling && prm->prot_seg_cnt) {
@@ -2854,18 +2990,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
 		if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
-			prm->prot_seg_cnt, cmd))
+			prm->prot_seg_cnt, &tc))
 			goto crc_queuing_error;
 	}
 	return QLA_SUCCESS;
 
 crc_queuing_error:
 	/* Cleanup will be performed by the caller */
+	vha->hw->tgt.cmds[h - 1] = NULL;
 
 	return QLA_FUNCTION_FAILED;
 }
 
-
 /*
  * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and *
  * QLA_TGT_XMIT_STATUS for >= 24xx silicon
@@ -3113,139 +3249,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer);
 
 
 /*
- * Checks the guard or meta-data for the type of error
- * detected by the HBA.
+ * it is assumed either hardware_lock or qpair lock is held.
  */
-static inline int
+static void
 qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd,
-		struct ctio_crc_from_fw *sts)
+	struct ctio_crc_from_fw *sts)
 {
 	uint8_t		*ap = &sts->actual_dif[0];
 	uint8_t		*ep = &sts->expected_dif[0];
-	uint32_t	e_ref_tag, a_ref_tag;
-	uint16_t	e_app_tag, a_app_tag;
-	uint16_t	e_guard, a_guard;
 	uint64_t	lba = cmd->se_cmd.t_task_lba;
+	uint8_t scsi_status, sense_key, asc, ascq;
+	unsigned long flags;
 
-	a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
-	a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
-	a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
-
-	e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
-	e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
-	e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
-
-	ql_dbg(ql_dbg_tgt, vha, 0xe075,
-	    "iocb(s) %p Returned STATUS.\n", sts);
-
-	ql_dbg(ql_dbg_tgt, vha, 0xf075,
-	    "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n",
-	    cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-	    a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard);
-
-	/*
-	 * Ignore sector if:
-	 * For type     3: ref & app tag is all 'f's
-	 * For type 0,1,2: app tag is all 'f's
-	 */
-	if ((a_app_tag == 0xffff) &&
-	    ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) ||
-	     (a_ref_tag == 0xffffffff))) {
-		uint32_t blocks_done;
-
-		/* 2TB boundary case covered automatically with this */
-		blocks_done = e_ref_tag - (uint32_t)lba + 1;
-		cmd->se_cmd.bad_sector = e_ref_tag;
-		cmd->se_cmd.pi_err = 0;
-		ql_dbg(ql_dbg_tgt, vha, 0xf074,
-			"need to return scsi good\n");
-
-		/* Update protection tag */
-		if (cmd->prot_sg_cnt) {
-			uint32_t i, k = 0, num_ent;
-			struct scatterlist *sg, *sgl;
-
+	cmd->trc_flags |= TRC_DIF_ERR;
 
-			sgl = cmd->prot_sg;
+	cmd->a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
+	cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
+	cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
 
-			/* Patch the corresponding protection tags */
-			for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) {
-				num_ent = sg_dma_len(sg) / 8;
-				if (k + num_ent < blocks_done) {
-					k += num_ent;
-					continue;
-				}
-				k = blocks_done;
-				break;
-			}
+	cmd->e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
+	cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
+	cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
 
-			if (k != blocks_done) {
-				ql_log(ql_log_warn, vha, 0xf076,
-				    "unexpected tag values tag:lba=%u:%llu)\n",
-				    e_ref_tag, (unsigned long long)lba);
-				goto out;
-			}
+	ql_dbg(ql_dbg_tgt_dif, vha, 0xf075,
+	    "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state);
 
-#if 0
-			struct sd_dif_tuple *spt;
-			/* TODO:
-			 * This section came from initiator. Is it valid here?
-			 * should ulp be override with actual val???
-			 */
-			spt = page_address(sg_page(sg)) + sg->offset;
-			spt += j;
+	scsi_status = sense_key = asc = ascq = 0;
 
-			spt->app_tag = 0xffff;
-			if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3)
-				spt->ref_tag = 0xffffffff;
-#endif
-		}
-
-		return 0;
-	}
-
-	/* check guard */
-	if (e_guard != a_guard) {
-		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
-		cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-		ql_log(ql_log_warn, vha, 0xe076,
-		    "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-		    cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-		    a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-		    a_guard, e_guard, cmd);
-		goto out;
+	/* check appl tag */
+	if (cmd->e_app_tag != cmd->a_app_tag) {
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			"App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+			"Ref[%x|%x], App[%x|%x], "
+			"Guard [%x|%x] cmd=%p ox_id[%04x]",
+			cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+			cmd->a_ref_tag, cmd->e_ref_tag,
+			cmd->a_app_tag, cmd->e_app_tag,
+			cmd->a_guard, cmd->e_guard,
+			cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+		cmd->dif_err_code = DIF_ERR_APP;
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+		sense_key = ABORTED_COMMAND;
+		asc = 0x10;
+		ascq = 0x2;
 	}
 
 	/* check ref tag */
-	if (e_ref_tag != a_ref_tag) {
-		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
-		cmd->se_cmd.bad_sector = e_ref_tag;
-
-		ql_log(ql_log_warn, vha, 0xe077,
-			"Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-			cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-			a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-			a_guard, e_guard, cmd);
+	if (cmd->e_ref_tag != cmd->a_ref_tag) {
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			"Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+			"Ref[%x|%x], App[%x|%x], "
+			"Guard[%x|%x] cmd=%p ox_id[%04x] ",
+			cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+			cmd->a_ref_tag, cmd->e_ref_tag,
+			cmd->a_app_tag, cmd->e_app_tag,
+			cmd->a_guard, cmd->e_guard,
+			cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+		cmd->dif_err_code = DIF_ERR_REF;
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+		sense_key = ABORTED_COMMAND;
+		asc = 0x10;
+		ascq = 0x3;
 		goto out;
 	}
 
-	/* check appl tag */
-	if (e_app_tag != a_app_tag) {
-		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
-		cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-		ql_log(ql_log_warn, vha, 0xe078,
-			"App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-			cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-			a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-			a_guard, e_guard, cmd);
-		goto out;
+	/* check guard */
+	if (cmd->e_guard != cmd->a_guard) {
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			"Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+			"Ref[%x|%x], App[%x|%x], "
+			"Guard [%x|%x] cmd=%p ox_id[%04x]",
+			cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+			cmd->a_ref_tag, cmd->e_ref_tag,
+			cmd->a_app_tag, cmd->e_app_tag,
+			cmd->a_guard, cmd->e_guard,
+			cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+		cmd->dif_err_code = DIF_ERR_GRD;
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+		sense_key = ABORTED_COMMAND;
+		asc = 0x10;
+		ascq = 0x1;
 	}
 out:
-	return 1;
-}
+	switch (cmd->state) {
+	case QLA_TGT_STATE_NEED_DATA:
+		/* handle_data will load DIF error code  */
+		cmd->state = QLA_TGT_STATE_DATA_IN;
+		vha->hw->tgt.tgt_ops->handle_data(cmd);
+		break;
+	default:
+		spin_lock_irqsave(&cmd->cmd_lock, flags);
+		if (cmd->aborted) {
+			spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+			vha->hw->tgt.tgt_ops->free_cmd(cmd);
+			break;
+		}
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
+		qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq);
+		/* assume scsi status gets out on the wire.
+		 * Will not wait for completion.
+		 */
+		vha->hw->tgt.tgt_ops->free_cmd(cmd);
+		break;
+	}
+}
 
 /* If hardware_lock held on entry, might drop it, then reaquire */
 /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */
@@ -3552,6 +3662,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio,
 {
 	int term = 0;
 
+	if (cmd->se_cmd.prot_op)
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+		    "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] "
+		    "se_cmd=%p tag[%x] op %#x/%s",
+		     cmd->lba, cmd->lba,
+		     cmd->num_blks, &cmd->se_cmd,
+		     cmd->atio.u.isp24.exchange_addr,
+		     cmd->se_cmd.prot_op,
+		     prot_op_str(cmd->se_cmd.prot_op));
+
 	if (ctio != NULL) {
 		struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio;
 		term = !(c->flags &
@@ -3769,32 +3889,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 			struct ctio_crc_from_fw *crc =
 				(struct ctio_crc_from_fw *)ctio;
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073,
-			    "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n",
+			    "qla_target(%d): CTIO with DIF_ERROR status %x "
+			    "received (state %x, ulp_cmd %p) actual_dif[0x%llx] "
+			    "expect_dif[0x%llx]\n",
 			    vha->vp_idx, status, cmd->state, se_cmd,
 			    *((u64 *)&crc->actual_dif[0]),
 			    *((u64 *)&crc->expected_dif[0]));
 
-			if (qlt_handle_dif_error(vha, cmd, ctio)) {
-				if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
-					/* scsi Write/xfer rdy complete */
-					goto skip_term;
-				} else {
-					/* scsi read/xmit respond complete
-					 * call handle dif to send scsi status
-					 * rather than terminate exchange.
-					 */
-					cmd->state = QLA_TGT_STATE_PROCESSED;
-					ha->tgt.tgt_ops->handle_dif_err(cmd);
-					return;
-				}
-			} else {
-				/* Need to generate a SCSI good completion.
-				 * because FW did not send scsi status.
-				 */
-				status = 0;
-				goto skip_term;
-			}
-			break;
+			qlt_handle_dif_error(vha, cmd, ctio);
+			return;
 		}
 		default:
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
@@ -3817,7 +3920,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 				return;
 		}
 	}
-skip_term:
 
 	if (cmd->state == QLA_TGT_STATE_PROCESSED) {
 		cmd->trc_flags |= TRC_CTIO_DONE;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index a7f90dcaae37..c35f889b94a6 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio)
 	atio->u.isp24.fcp_cmnd.add_cdb_len = 0;
 }
 
+static inline int get_datalen_for_atio(struct atio_from_isp *atio)
+{
+	int len = atio->u.isp24.fcp_cmnd.add_cdb_len;
+
+	return (be32_to_cpu(get_unaligned((uint32_t *)
+	    &atio->u.isp24.fcp_cmnd.add_cdb[len * 4])));
+}
+
 #define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */
 
 /*
@@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl {
 	int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *,
 			unsigned char *, uint32_t, int, int, int);
 	void (*handle_data)(struct qla_tgt_cmd *);
-	void (*handle_dif_err)(struct qla_tgt_cmd *);
 	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
@@ -684,6 +691,8 @@ struct qla_tgt_func_tmpl {
 	void (*clear_nacl_from_fcport_map)(struct fc_port *);
 	void (*put_sess)(struct fc_port *);
 	void (*shutdown_sess)(struct fc_port *);
+	int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
+	int (*chk_dif_tags)(uint32_t tag);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -720,8 +729,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
 #define QLA_TGT_ABORT_ALL               0xFFFE
 #define QLA_TGT_NEXUS_LOSS_SESS         0xFFFD
 #define QLA_TGT_NEXUS_LOSS              0xFFFC
-#define QLA_TGT_ABTS					0xFFFB
-#define QLA_TGT_2G_ABORT_TASK			0xFFFA
+#define QLA_TGT_ABTS			0xFFFB
+#define QLA_TGT_2G_ABORT_TASK		0xFFFA
 
 /* Notify Acknowledge flags */
 #define NOTIFY_ACK_RES_COUNT        BIT_8
@@ -845,6 +854,7 @@ enum trace_flags {
 	TRC_CMD_FREE = BIT_17,
 	TRC_DATA_IN = BIT_18,
 	TRC_ABORT = BIT_19,
+	TRC_DIF_ERR = BIT_20,
 };
 
 struct qla_tgt_cmd {
@@ -862,7 +872,6 @@ struct qla_tgt_cmd {
 	unsigned int sg_mapped:1;
 	unsigned int free_sg:1;
 	unsigned int write_data_transferred:1;
-	unsigned int ctx_dsd_alloced:1;
 	unsigned int q_full:1;
 	unsigned int term_exchg:1;
 	unsigned int cmd_sent_to_fw:1;
@@ -885,11 +894,25 @@ struct qla_tgt_cmd {
 	struct list_head cmd_list;
 
 	struct atio_from_isp atio;
-	/* t10dif */
+
+	uint8_t ctx_dsd_alloced;
+
+	/* T10-DIF */
+#define DIF_ERR_NONE 0
+#define DIF_ERR_GRD 1
+#define DIF_ERR_REF 2
+#define DIF_ERR_APP 3
+	int8_t dif_err_code;
 	struct scatterlist *prot_sg;
 	uint32_t prot_sg_cnt;
-	uint32_t blk_sz;
+	uint32_t blk_sz, num_blks;
+	uint8_t scsi_status, sense_key, asc, ascq;
+
 	struct crc_context *ctx;
+	uint8_t		*cdb;
+	uint64_t	lba;
+	uint16_t	a_guard, e_guard, a_app_tag, e_app_tag;
+	uint32_t	a_ref_tag, e_ref_tag;
 
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;
@@ -1053,4 +1076,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *);
 extern void qlt_logo_completion_handler(fc_port_t *, int);
 extern void qlt_do_generation_tick(struct scsi_qla_host *, int *);
 
+void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t,
+    uint8_t, uint8_t, uint8_t);
+
 #endif /* __QLA_TARGET_H */
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 8e8ab0fa9672..7443e4efa3ae 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 			return;
 		}
 
+		switch (cmd->dif_err_code) {
+		case DIF_ERR_GRD:
+			cmd->se_cmd.pi_err =
+			    TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+			break;
+		case DIF_ERR_REF:
+			cmd->se_cmd.pi_err =
+			    TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+			break;
+		case DIF_ERR_APP:
+			cmd->se_cmd.pi_err =
+			    TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+			break;
+		case DIF_ERR_NONE:
+		default:
+			break;
+		}
+
 		if (cmd->se_cmd.pi_err)
 			transport_generic_request_failure(&cmd->se_cmd,
 				cmd->se_cmd.pi_err);
@@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
-static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
+static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)
 {
-	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-
-	/* take an extra kref to prevent cmd free too early.
-	 * need to wait for SCSI status/check condition to
-	 * finish responding generate by transport_generic_request_failure.
-	 */
-	kref_get(&cmd->se_cmd.cmd_kref);
-	transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err);
+	return 0;
 }
 
-/*
- * Called from qla_target.c:qlt_do_ctio_completion()
- */
-static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
+static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
+    uint16_t *pfw_prot_opts)
 {
-	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work);
-	queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+
+	if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+		*pfw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+
+	if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG))
+		*pfw_prot_opts |= PO_DIS_APP_TAG_VALD;
+
+	return 0;
 }
 
 /*
@@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
 static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.handle_cmd		= tcm_qla2xxx_handle_cmd,
 	.handle_data		= tcm_qla2xxx_handle_data,
-	.handle_dif_err		= tcm_qla2xxx_handle_dif_err,
 	.handle_tmr		= tcm_qla2xxx_handle_tmr,
 	.free_cmd		= tcm_qla2xxx_free_cmd,
 	.free_mcmd		= tcm_qla2xxx_free_mcmd,
@@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
 	.put_sess		= tcm_qla2xxx_put_sess,
 	.shutdown_sess		= tcm_qla2xxx_shutdown_sess,
+	.get_dif_tags		= tcm_qla2xxx_dif_tags,
+	.chk_dif_tags		= tcm_qla2xxx_chk_dif_tags,
 };
 
 static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport)
-- 
cgit v1.2.3


From 54b9993c8cf2d77c0f23be828a22e0817f742442 Mon Sep 17 00:00:00 2001
From: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Date: Wed, 15 Mar 2017 09:48:50 -0700
Subject: qla2xxx: Export DIF stats via debugfs

Signed-off-by: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h | 12 ++++++++++++
 drivers/scsi/qla2xxx/qla_dfs.c | 15 +++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 9d1d3dcf1c87..8228dfac6a31 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3108,6 +3108,16 @@ struct qla_chip_state_84xx {
 	uint32_t gold_fw_version;
 };
 
+struct qla_dif_statistics {
+	uint64_t dif_input_bytes;
+	uint64_t dif_output_bytes;
+	uint64_t dif_input_requests;
+	uint64_t dif_output_requests;
+	uint32_t dif_guard_err;
+	uint32_t dif_ref_tag_err;
+	uint32_t dif_app_tag_err;
+};
+
 struct qla_statistics {
 	uint32_t total_isp_aborts;
 	uint64_t input_bytes;
@@ -3120,6 +3130,8 @@ struct qla_statistics {
 	uint32_t stat_max_pend_cmds;
 	uint32_t stat_max_qfull_cmds_alloc;
 	uint32_t stat_max_qfull_cmds_dropped;
+
+	struct qla_dif_statistics qla_dif_stats;
 };
 
 struct bidi_statistics {
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index b48cce696bac..3b35905619b0 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -114,6 +114,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
 	seq_printf(s, "num Q full sent = %lld\n",
 		vha->tgt_counters.num_q_full_sent);
 
+	/* DIF stats */
+	seq_printf(s, "DIF Inp Bytes = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_input_bytes);
+	seq_printf(s, "DIF Outp Bytes = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_output_bytes);
+	seq_printf(s, "DIF Inp Req = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_input_requests);
+	seq_printf(s, "DIF Outp Req = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_output_requests);
+	seq_printf(s, "DIF Guard err = %d\n",
+		vha->qla_stats.qla_dif_stats.dif_guard_err);
+	seq_printf(s, "DIF Ref tag err = %d\n",
+		vha->qla_stats.qla_dif_stats.dif_ref_tag_err);
+	seq_printf(s, "DIF App tag err = %d\n",
+		vha->qla_stats.qla_dif_stats.dif_app_tag_err);
 	return 0;
 }
 
-- 
cgit v1.2.3


From f1443eebca7792b3b8b41b27652d67ddc5d31fa2 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:51 -0700
Subject: qla2xxx: Add async new target notification

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 6 +++---
 drivers/scsi/qla2xxx/qla_target.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 532004981dbd..563116188c43 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6005,13 +6005,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX;
 	tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX;
 
-	if (base_vha->fc_vport)
-		return 0;
-
 	mutex_lock(&qla_tgt_mutex);
 	list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist);
 	mutex_unlock(&qla_tgt_mutex);
 
+	if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target)
+		ha->tgt.tgt_ops->add_target(base_vha);
+
 	return 0;
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index c35f889b94a6..d64420251194 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -693,6 +693,7 @@ struct qla_tgt_func_tmpl {
 	void (*shutdown_sess)(struct fc_port *);
 	int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
 	int (*chk_dif_tags)(uint32_t tag);
+	void (*add_target)(struct scsi_qla_host *);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
-- 
cgit v1.2.3


From 15f30a5752287f20c7de428423c34bc51cfbe465 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:52 -0700
Subject: qla2xxx: Use IOCB interface to submit non-critical MBX.

The Mailbox interface is currently over subscribed. We like
to reserve the Mailbox interface for the chip managment and
link initialization. Any non essential Mailbox command will
be routed through the IOCB interface. The IOCB interface is
able to absorb more commands.

Following commands are being routed through IOCB interface

- Get ID List (007Ch)
- Get Port DB (0064h)
- Get Link Priv Stats (006Dh)

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  12 +-
 drivers/scsi/qla2xxx/qla_gbl.h    |  10 +-
 drivers/scsi/qla2xxx/qla_init.c   |  46 +------
 drivers/scsi/qla2xxx/qla_isr.c    |   2 +-
 drivers/scsi/qla2xxx/qla_mbx.c    | 270 ++++++++++++++++++++++++++++++++++++--
 drivers/scsi/qla2xxx/qla_target.c |   4 +-
 6 files changed, 279 insertions(+), 65 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 8228dfac6a31..ae38b7a789b1 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -395,11 +395,15 @@ struct srb_iocb {
 			struct completion comp;
 		} abt;
 		struct ct_arg ctarg;
+#define MAX_IOCB_MB_REG 28
+#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t))
 		struct {
-			__le16 in_mb[28]; 	/* fr fw */
-			__le16 out_mb[28];	/* to fw */
+			__le16 in_mb[MAX_IOCB_MB_REG];	/* from FW */
+			__le16 out_mb[MAX_IOCB_MB_REG];	/* to FW */
 			void *out, *in;
 			dma_addr_t out_dma, in_dma;
+			struct completion comp;
+			int rc;
 		} mbx;
 		struct {
 			struct imm_ntfy_from_isp *ntfy;
@@ -437,7 +441,7 @@ typedef struct srb {
 	uint32_t handle;
 	uint16_t flags;
 	uint16_t type;
-	char *name;
+	const char *name;
 	int iocbs;
 	struct qla_qpair *qpair;
 	u32 gen1;	/* scratch */
@@ -3364,6 +3368,8 @@ struct qla_hw_data {
 		uint32_t	exlogins_enabled:1;
 		uint32_t	exchoffld_enabled:1;
 		/* 35 bits */
+
+		uint32_t	fw_started:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index ca6f122e5865..323b912b47f7 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
 void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
 	uint16_t *);
 int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_abort_cmd(srb_t *);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
 
 extern int
 qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
-    dma_addr_t, uint);
+    dma_addr_t, uint16_t);
 
 extern int qla24xx_abort_command(srb_t *);
 extern int qla24xx_async_abort_command(srb_t *);
@@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t);
 extern int
 qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint);
 
+int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *);
+int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t,
+    uint16_t *);
+int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *,
+	struct port_database_24xx *);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a7865a5d556d..b1bfa63f7d4e 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
 	struct srb *sp = s;
 	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
-	uint64_t zero = 0;
 	struct port_database_24xx *pd;
 	fc_port_t *fcport = sp->fcport;
 	u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
@@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
 
 	pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
 
-	/* Check for logged in state. */
-	if (pd->current_login_state != PDS_PRLI_COMPLETE &&
-	    pd->last_login_state != PDS_PRLI_COMPLETE) {
-		ql_dbg(ql_dbg_mbx, vha, 0xffff,
-		    "Unable to verify login-state (%x/%x) for "
-		    "loop_id %x.\n", pd->current_login_state,
-		    pd->last_login_state, fcport->loop_id);
-		rval = QLA_FUNCTION_FAILED;
-		goto gpd_error_out;
-	}
-
-	if (fcport->loop_id == FC_NO_LOOP_ID ||
-	    (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
-		memcmp(fcport->port_name, pd->port_name, 8))) {
-		/* We lost the device mid way. */
-		rval = QLA_NOT_LOGGED_IN;
-		goto gpd_error_out;
-	}
-
-	/* Names are little-endian. */
-	memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
-
-	/* Get port_id of device. */
-	fcport->d_id.b.domain = pd->port_id[0];
-	fcport->d_id.b.area = pd->port_id[1];
-	fcport->d_id.b.al_pa = pd->port_id[2];
-	fcport->d_id.b.rsvd_1 = 0;
-
-	/* If not target must be initiator or unknown type. */
-	if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
-		fcport->port_type = FCT_INITIATOR;
-	else
-		fcport->port_type = FCT_TARGET;
-
-	/* Passback COS information. */
-	fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
-		FC_COS_CLASS2 : FC_COS_CLASS3;
-
-	if (pd->prli_svc_param_word_3[0] & BIT_7) {
-		fcport->flags |= FCF_CONF_COMP_SUPPORTED;
-		fcport->conf_compl_supported = 1;
-	}
+	rval = __qla24xx_parse_gpdb(vha, fcport, pd);
 
 gpd_error_out:
 	memset(&ea, 0, sizeof(ea));
@@ -1266,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
 	complete(&abt->u.abt.comp);
 }
 
-static int
+int
 qla24xx_async_abort_cmd(srb_t *cmd_sp)
 {
 	scsi_qla_host_t *vha = cmd_sp->vha;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index b2c6da752edd..3953c8d6af69 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2692,7 +2692,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 		return;
 
 	abt = &sp->u.iocb_cmd;
-	abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
+	abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle);
 	sp->done(sp, 0);
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 35079f417417..e40ed570d3c1 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -10,6 +10,28 @@
 #include <linux/delay.h>
 #include <linux/gfp.h>
 
+static struct mb_cmd_name {
+	uint16_t cmd;
+	const char *str;
+} mb_str[] = {
+	{MBC_GET_PORT_DATABASE,		"GPDB"},
+	{MBC_GET_ID_LIST,		"GIDList"},
+	{MBC_GET_LINK_PRIV_STATS,	"Stats"},
+};
+
+static const char *mb_to_str(uint16_t cmd)
+{
+	int i;
+	struct mb_cmd_name *e;
+
+	for (i = 0; i < ARRAY_SIZE(mb_str); i++) {
+		e = mb_str + i;
+		if (cmd == e->cmd)
+			return e->str;
+	}
+	return "unknown";
+}
+
 static struct rom_cmd {
 	uint16_t cmd;
 } rom_cmds[] = {
@@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id,
 
 int
 qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
-    dma_addr_t stats_dma, uint options)
+    dma_addr_t stats_dma, uint16_t options)
 {
 	int rval;
 	mbx_cmd_t mc;
@@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088,
 	    "Entered %s.\n", __func__);
 
-	mcp->mb[0] = MBC_GET_LINK_PRIV_STATS;
-	mcp->mb[2] = MSW(stats_dma);
-	mcp->mb[3] = LSW(stats_dma);
-	mcp->mb[6] = MSW(MSD(stats_dma));
-	mcp->mb[7] = LSW(MSD(stats_dma));
-	mcp->mb[8] = sizeof(struct link_statistics) / 4;
-	mcp->mb[9] = vha->vp_idx;
-	mcp->mb[10] = options;
-	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
-	mcp->in_mb = MBX_2|MBX_1|MBX_0;
-	mcp->tov = MBX_TOV_SECONDS;
-	mcp->flags = IOCTL_CMD;
-	rval = qla2x00_mailbox_command(vha, mcp);
+	memset(&mc, 0, sizeof(mc));
+	mc.mb[0] = MBC_GET_LINK_PRIV_STATS;
+	mc.mb[2] = MSW(stats_dma);
+	mc.mb[3] = LSW(stats_dma);
+	mc.mb[6] = MSW(MSD(stats_dma));
+	mc.mb[7] = LSW(MSD(stats_dma));
+	mc.mb[8] = sizeof(struct link_statistics) / 4;
+	mc.mb[9] = cpu_to_le16(vha->vp_idx);
+	mc.mb[10] = cpu_to_le16(options);
+
+	rval = qla24xx_send_mb_cmd(vha, &mc);
 
 	if (rval == QLA_SUCCESS) {
 		if (mcp->mb[0] != MBS_COMMAND_COMPLETE) {
@@ -5827,3 +5847,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha,
 
 	return rval;
 }
+
+static void qla2x00_async_mb_sp_done(void *s, int res)
+{
+	struct srb *sp = s;
+
+	sp->u.iocb_cmd.u.mbx.rc = res;
+
+	complete(&sp->u.iocb_cmd.u.mbx.comp);
+	/* don't free sp here. Let the caller do the free */
+}
+
+/*
+ * This mailbox uses the iocb interface to send MB command.
+ * This allows non-critial (non chip setup) command to go
+ * out in parrallel.
+ */
+int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	srb_t *sp;
+	struct srb_iocb *c;
+
+	if (!vha->hw->flags.fw_started)
+		goto done;
+
+	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_MB_IOCB;
+	sp->name = mb_to_str(mcp->mb[0]);
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
+
+	c = &sp->u.iocb_cmd;
+	c->timeout = qla2x00_async_iocb_timeout;
+	init_completion(&c->u.mbx.comp);
+
+	sp->done = qla2x00_async_mb_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+		    "%s: %s Failed submission. %x.\n",
+		    __func__, sp->name, rval);
+		goto done_free_sp;
+	}
+
+	ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n",
+	    sp->name, sp->handle);
+
+	wait_for_completion(&c->u.mbx.comp);
+	memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG);
+
+	rval = c->u.mbx.rc;
+	switch (rval) {
+	case QLA_FUNCTION_TIMEOUT:
+		ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n",
+		    __func__, sp->name, rval);
+		break;
+	case  QLA_SUCCESS:
+		ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n",
+		    __func__, sp->name);
+		sp->free(sp);
+		break;
+	default:
+		ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n",
+		    __func__, sp->name, rval);
+		sp->free(sp);
+		break;
+	}
+
+	return rval;
+
+done_free_sp:
+	sp->free(sp);
+done:
+	return rval;
+}
+
+/*
+ * qla24xx_gpdb_wait
+ * NOTE: Do not call this routine from DPC thread
+ */
+int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	dma_addr_t pd_dma;
+	struct port_database_24xx *pd;
+	struct qla_hw_data *ha = vha->hw;
+	mbx_cmd_t mc;
+
+	if (!vha->hw->flags.fw_started)
+		goto done;
+
+	pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+	if (pd  == NULL) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate port database structure.\n");
+		goto done_free_sp;
+	}
+	memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+	memset(&mc, 0, sizeof(mc));
+	mc.mb[0] = MBC_GET_PORT_DATABASE;
+	mc.mb[1] = cpu_to_le16(fcport->loop_id);
+	mc.mb[2] = MSW(pd_dma);
+	mc.mb[3] = LSW(pd_dma);
+	mc.mb[6] = MSW(MSD(pd_dma));
+	mc.mb[7] = LSW(MSD(pd_dma));
+	mc.mb[9] = cpu_to_le16(vha->vp_idx);
+	mc.mb[10] = cpu_to_le16((uint16_t)opt);
+
+	rval = qla24xx_send_mb_cmd(vha, &mc);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+		    "%s: %8phC fail\n", __func__, fcport->port_name);
+		goto done_free_sp;
+	}
+
+	rval = __qla24xx_parse_gpdb(vha, fcport, pd);
+
+	ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n",
+	    __func__, fcport->port_name);
+
+done_free_sp:
+	if (pd)
+		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+done:
+	return rval;
+}
+
+int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport,
+    struct port_database_24xx *pd)
+{
+	int rval = QLA_SUCCESS;
+	uint64_t zero = 0;
+
+	/* Check for logged in state. */
+	if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+		pd->last_login_state != PDS_PRLI_COMPLETE) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+			   "Unable to verify login-state (%x/%x) for "
+			   "loop_id %x.\n", pd->current_login_state,
+			   pd->last_login_state, fcport->loop_id);
+		rval = QLA_FUNCTION_FAILED;
+		goto gpd_error_out;
+	}
+
+	if (fcport->loop_id == FC_NO_LOOP_ID ||
+	    (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+	     memcmp(fcport->port_name, pd->port_name, 8))) {
+		/* We lost the device mid way. */
+		rval = QLA_NOT_LOGGED_IN;
+		goto gpd_error_out;
+	}
+
+	/* Names are little-endian. */
+	memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+	memcpy(fcport->port_name, pd->port_name, WWN_SIZE);
+
+	/* Get port_id of device. */
+	fcport->d_id.b.domain = pd->port_id[0];
+	fcport->d_id.b.area = pd->port_id[1];
+	fcport->d_id.b.al_pa = pd->port_id[2];
+	fcport->d_id.b.rsvd_1 = 0;
+
+	/* If not target must be initiator or unknown type. */
+	if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+		fcport->port_type = FCT_INITIATOR;
+	else
+		fcport->port_type = FCT_TARGET;
+
+	/* Passback COS information. */
+	fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+		FC_COS_CLASS2 : FC_COS_CLASS3;
+
+	if (pd->prli_svc_param_word_3[0] & BIT_7) {
+		fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+		fcport->conf_compl_supported = 1;
+	}
+
+gpd_error_out:
+	return rval;
+}
+
+/*
+ * qla24xx_gidlist__wait
+ * NOTE: don't call this routine from DPC thread.
+ */
+int qla24xx_gidlist_wait(struct scsi_qla_host *vha,
+	void *id_list, dma_addr_t id_list_dma, uint16_t *entries)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	mbx_cmd_t mc;
+
+	if (!vha->hw->flags.fw_started)
+		goto done;
+
+	memset(&mc, 0, sizeof(mc));
+	mc.mb[0] = MBC_GET_ID_LIST;
+	mc.mb[2] = MSW(id_list_dma);
+	mc.mb[3] = LSW(id_list_dma);
+	mc.mb[6] = MSW(MSD(id_list_dma));
+	mc.mb[7] = LSW(MSD(id_list_dma));
+	mc.mb[8] = 0;
+	mc.mb[9] = cpu_to_le16(vha->vp_idx);
+
+	rval = qla24xx_send_mb_cmd(vha, &mc);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+			"%s:  fail\n", __func__);
+	} else {
+		*entries = mc.mb[1];
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+			"%s:  done\n", __func__);
+	}
+done:
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 563116188c43..38bdcd325fa4 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1238,7 +1238,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
 	}
 
 	/* Get list of logged in devices */
-	rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries);
+	rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries);
 	if (rc != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045,
 		    "qla_target(%d): get_id_list() failed: %x\n",
@@ -5648,7 +5648,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 
 	fcport->loop_id = loop_id;
 
-	rc = qla2x00_get_port_database(vha, fcport, 0);
+	rc = qla24xx_gpdb_wait(vha, fcport, 0);
 	if (rc != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070,
 		    "qla_target(%d): Failed to retrieve fcport "
-- 
cgit v1.2.3


From c423437e3ff41b8ca551ab6621baf11538dbfe9d Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Wed, 15 Mar 2017 09:48:53 -0700
Subject: qla2xxx: Add DebugFS node to display Port Database

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h |  2 +
 drivers/scsi/qla2xxx/qla_dfs.c | 92 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ae38b7a789b1..089480280558 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3300,6 +3300,8 @@ struct qlt_hw_data {
 	uint8_t tgt_node_name[WWN_SIZE];
 
 	struct dentry *dfs_tgt_sess;
+	struct dentry *dfs_tgt_port_database;
+
 	struct list_head q_full_list;
 	uint32_t num_pend_cmds;
 	uint32_t num_qfull_cmds_alloc;
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 3b35905619b0..989e17b0758c 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	struct fc_port *sess = NULL;
-	struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 
-	seq_printf(s, "%s\n",vha->host_str);
+	seq_printf(s, "%s\n", vha->host_str);
 	if (tgt) {
-		seq_printf(s, "Port ID   Port Name                Handle\n");
+		seq_puts(s, "Port ID   Port Name                Handle\n");
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		list_for_each_entry(sess, &vha->vp_fcports, list)
@@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
 	return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
 }
 
-
 static const struct file_operations dfs_tgt_sess_ops = {
 	.open		= qla2x00_dfs_tgt_sess_open,
 	.read		= seq_read,
@@ -52,6 +51,78 @@ static const struct file_operations dfs_tgt_sess_ops = {
 	.release	= single_release,
 };
 
+static int
+qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
+{
+	scsi_qla_host_t *vha = s->private;
+	struct qla_hw_data *ha = vha->hw;
+	struct gid_list_info *gid_list;
+	dma_addr_t gid_list_dma;
+	fc_port_t fc_port;
+	char *id_iter;
+	int rc, i;
+	uint16_t entries, loop_id;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+
+	seq_printf(s, "%s\n", vha->host_str);
+	if (tgt) {
+		gid_list = dma_alloc_coherent(&ha->pdev->dev,
+		    qla2x00_gid_list_size(ha),
+		    &gid_list_dma, GFP_KERNEL);
+		if (!gid_list) {
+			ql_dbg(ql_dbg_user, vha, 0x705c,
+			    "DMA allocation failed for %u\n",
+			     qla2x00_gid_list_size(ha));
+			return 0;
+		}
+
+		rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
+		    &entries);
+		if (rc != QLA_SUCCESS)
+			goto out_free_id_list;
+
+		id_iter = (char *)gid_list;
+
+		seq_puts(s, "Port Name	Port ID 	Loop ID\n");
+
+		for (i = 0; i < entries; i++) {
+			struct gid_list_info *gid =
+			    (struct gid_list_info *)id_iter;
+			loop_id = le16_to_cpu(gid->loop_id);
+			memset(&fc_port, 0, sizeof(fc_port_t));
+
+			fc_port.loop_id = loop_id;
+
+			rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
+			seq_printf(s, "%8phC  %02x%02x%02x  %d\n",
+				fc_port.port_name, fc_port.d_id.b.domain,
+				fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
+				fc_port.loop_id);
+			id_iter += ha->gid_list_info_size;
+		}
+out_free_id_list:
+		dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
+		    gid_list, gid_list_dma);
+	}
+
+	return 0;
+}
+
+static int
+qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file)
+{
+	scsi_qla_host_t *vha = inode->i_private;
+
+	return single_open(file, qla2x00_dfs_tgt_port_database_show, vha);
+}
+
+static const struct file_operations dfs_tgt_port_database_ops = {
+	.open		= qla2x00_dfs_tgt_port_database_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int
 qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
 {
@@ -296,6 +367,14 @@ create_nodes:
 		goto out;
 	}
 
+	ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database",
+	    S_IRUSR,  ha->dfs_dir, vha, &dfs_tgt_port_database_ops);
+	if (!ha->tgt.dfs_tgt_port_database) {
+		ql_log(ql_log_warn, vha, 0xffff,
+		    "Unable to create debugFS tgt_port_database node.\n");
+		goto out;
+	}
+
 	ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha,
 	    &dfs_fce_ops);
 	if (!ha->dfs_fce) {
@@ -326,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha)
 		ha->tgt.dfs_tgt_sess = NULL;
 	}
 
+	if (ha->tgt.dfs_tgt_port_database) {
+		debugfs_remove(ha->tgt.dfs_tgt_port_database);
+		ha->tgt.dfs_tgt_port_database = NULL;
+	}
+
 	if (ha->dfs_fw_resource_cnt) {
 		debugfs_remove(ha->dfs_fw_resource_cnt);
 		ha->dfs_fw_resource_cnt = NULL;
-- 
cgit v1.2.3


From 482c9dc79204bb83c3433a59680c787a0b98c000 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:54 -0700
Subject: qla2xxx: Change scsi host lookup method.

For target mode, when new scsi command arrive, driver first performs
a look up of the SCSI Host. The current look up method is based on
the ALPA portion of the NPort ID. For Cisco switch, the ALPA can
not be used as the index. Instead, the new search method is based
on the full value of the Nport_ID via btree lib.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/Kconfig      |  1 +
 drivers/scsi/qla2xxx/qla_def.h    |  2 +
 drivers/scsi/qla2xxx/qla_gbl.h    |  2 +
 drivers/scsi/qla2xxx/qla_init.c   | 14 +++---
 drivers/scsi/qla2xxx/qla_mbx.c    | 28 ++++--------
 drivers/scsi/qla2xxx/qla_os.c     |  1 +
 drivers/scsi/qla2xxx/qla_target.c | 92 +++++++++++++++++++++++++++++++++------
 7 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig
index 67c0d5aa3212..de952935b5d2 100644
--- a/drivers/scsi/qla2xxx/Kconfig
+++ b/drivers/scsi/qla2xxx/Kconfig
@@ -3,6 +3,7 @@ config SCSI_QLA_FC
 	depends on PCI && SCSI
 	depends on SCSI_FC_ATTRS
 	select FW_LOADER
+	select BTREE
 	---help---
 	This qla2xxx driver supports all QLogic Fibre Channel
 	PCI and PCIe host adapters.
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 089480280558..9251918773b1 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -25,6 +25,7 @@
 #include <linux/firmware.h>
 #include <linux/aer.h>
 #include <linux/mutex.h>
+#include <linux/btree.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -3311,6 +3312,7 @@ struct qlt_hw_data {
 	spinlock_t sess_lock;
 	int rspq_vector_cpuid;
 	spinlock_t atio_lock ____cacheline_aligned;
+	struct btree_head32 host_map;
 };
 
 #define MAX_QFULL_CMDS_ALLOC	8192
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 323b912b47f7..5b2451745e9f 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -854,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
 	uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
 void qla24xx_delete_sess_fn(struct work_struct *);
 void qlt_unknown_atio_work_fn(struct work_struct *);
+void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
+void qlt_remove_target_resources(struct qla_hw_data *);
 
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index b1bfa63f7d4e..b0f6ad3020d3 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3340,8 +3340,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 	uint8_t       domain;
 	char		connect_type[22];
 	struct qla_hw_data *ha = vha->hw;
-	unsigned long flags;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+	port_id_t id;
 
 	/* Get host addresses. */
 	rval = qla2x00_get_adapter_id(vha,
@@ -3419,13 +3419,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 
 	/* Save Host port and loop ID. */
 	/* byte order - Big Endian */
-	vha->d_id.b.domain = domain;
-	vha->d_id.b.area = area;
-	vha->d_id.b.al_pa = al_pa;
-
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	qlt_update_vp_map(vha, SET_AL_PA);
-	spin_unlock_irqrestore(&ha->vport_slock, flags);
+	id.b.domain = domain;
+	id.b.area = area;
+	id.b.al_pa = al_pa;
+	id.b.rsvd_1 = 0;
+	qlt_update_host_map(vha, id);
 
 	if (!vha->flags.init_done)
 		ql_log(ql_log_info, vha, 0x2010,
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index e40ed570d3c1..53d9579acc74 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3623,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	scsi_qla_host_t *vp = NULL;
 	unsigned long   flags;
 	int found;
+	port_id_t id;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6,
 	    "Entered %s.\n", __func__);
@@ -3630,6 +3631,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	if (rptid_entry->entry_status != 0)
 		return;
 
+	id.b.domain = rptid_entry->port_id[2];
+	id.b.area   = rptid_entry->port_id[1];
+	id.b.al_pa  = rptid_entry->port_id[0];
+	id.b.rsvd_1 = 0;
+
 	if (rptid_entry->format == 0) {
 		/* loop */
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
@@ -3641,13 +3647,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
 
-		vha->d_id.b.domain = rptid_entry->port_id[2];
-		vha->d_id.b.area = rptid_entry->port_id[1];
-		vha->d_id.b.al_pa = rptid_entry->port_id[0];
-
-		spin_lock_irqsave(&ha->vport_slock, flags);
-		qlt_update_vp_map(vha, SET_AL_PA);
-		spin_unlock_irqrestore(&ha->vport_slock, flags);
+		qlt_update_host_map(vha, id);
 
 	} else if (rptid_entry->format == 1) {
 		/* fabric */
@@ -3673,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 					    WWN_SIZE);
 				}
 
-				vha->d_id.b.domain = rptid_entry->port_id[2];
-				vha->d_id.b.area = rptid_entry->port_id[1];
-				vha->d_id.b.al_pa = rptid_entry->port_id[0];
-				spin_lock_irqsave(&ha->vport_slock, flags);
-				qlt_update_vp_map(vha, SET_AL_PA);
-				spin_unlock_irqrestore(&ha->vport_slock, flags);
+				qlt_update_host_map(vha, id);
 			}
 
 			fc_host_port_name(vha->host) =
@@ -3714,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 			if (!found)
 				return;
 
-			vp->d_id.b.domain = rptid_entry->port_id[2];
-			vp->d_id.b.area =  rptid_entry->port_id[1];
-			vp->d_id.b.al_pa = rptid_entry->port_id[0];
-			spin_lock_irqsave(&ha->vport_slock, flags);
-			qlt_update_vp_map(vp, SET_AL_PA);
-			spin_unlock_irqrestore(&ha->vport_slock, flags);
+			qlt_update_host_map(vp, id);
 
 			/*
 			 * Cannot configure here as we are still sitting on the
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 54d4e802bde0..344faf59783d 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -3469,6 +3469,7 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	qla2x00_free_sysfs_attr(base_vha, true);
 
 	fc_remove_host(base_vha->host);
+	qlt_remove_target_resources(ha);
 
 	scsi_remove_host(base_vha->host);
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 38bdcd325fa4..7278e046bf87 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -187,21 +187,23 @@ static inline
 struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
 	uint8_t *d_id)
 {
-	struct qla_hw_data *ha = vha->hw;
-	uint8_t vp_idx;
-
-	if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0]))
-		return NULL;
+	struct scsi_qla_host *host;
+	uint32_t key = 0;
 
-	if (vha->d_id.b.al_pa == d_id[2])
+	if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) &&
+	    (vha->d_id.b.al_pa == d_id[2]))
 		return vha;
 
-	BUG_ON(ha->tgt.tgt_vp_map == NULL);
-	vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx;
-	if (likely(test_bit(vp_idx, ha->vp_idx_map)))
-		return ha->tgt.tgt_vp_map[vp_idx].vha;
+	key  = (uint32_t)d_id[0] << 16;
+	key |= (uint32_t)d_id[1] <<  8;
+	key |= (uint32_t)d_id[2];
 
-	return NULL;
+	host = btree_lookup32(&vha->hw->tgt.host_map, key);
+	if (!host)
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			   "Unable to find host %06x\n", key);
+
+	return host;
 }
 
 static inline
@@ -6040,6 +6042,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha)
 	return 0;
 }
 
+void qlt_remove_target_resources(struct qla_hw_data *ha)
+{
+	struct scsi_qla_host *node;
+	u32 key = 0;
+
+	btree_for_each_safe32(&ha->tgt.host_map, key, node)
+		btree_remove32(&ha->tgt.host_map, key);
+
+	btree_destroy32(&ha->tgt.host_map);
+}
+
 static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
 	unsigned char *b)
 {
@@ -6693,6 +6706,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
 void
 qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 {
+	int rc;
+
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
@@ -6712,6 +6727,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 	    qlt_unknown_atio_work_fn);
 
 	qlt_clear_mode(base_vha);
+
+	rc = btree_init32(&ha->tgt.host_map);
+	if (rc)
+		ql_log(ql_log_info, base_vha, 0xffff,
+		    "Unable to initialize ha->host_map btree\n");
+
+	qlt_update_vp_map(base_vha, SET_VP_IDX);
 }
 
 irqreturn_t
@@ -6820,25 +6842,69 @@ qlt_mem_free(struct qla_hw_data *ha)
 void
 qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
 {
+	void *slot;
+	u32 key;
+	int rc;
+
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
+	key = vha->d_id.b24;
+
 	switch (cmd) {
 	case SET_VP_IDX:
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
 		break;
 	case SET_AL_PA:
-		vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx;
+		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		if (!slot) {
+			ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			    "Save vha in host_map %p %06x\n", vha, key);
+			rc = btree_insert32(&vha->hw->tgt.host_map,
+				key, vha, GFP_ATOMIC);
+			if (rc)
+				ql_log(ql_log_info, vha, 0xffff,
+				    "Unable to insert s_id into host_map: %06x\n",
+				    key);
+			return;
+		}
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			"replace existing vha in host_map %p %06x\n", vha, key);
+		btree_update32(&vha->hw->tgt.host_map, key, vha);
 		break;
 	case RESET_VP_IDX:
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
 		break;
 	case RESET_AL_PA:
-		vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0;
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+		   "clear vha in host_map %p %06x\n", vha, key);
+		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		if (slot)
+			btree_remove32(&vha->hw->tgt.host_map, key);
+		vha->d_id.b24 = 0;
 		break;
 	}
 }
 
+void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id)
+{
+	unsigned long flags;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!vha->d_id.b24) {
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		vha->d_id = id;
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+	} else if (vha->d_id.b24 != id.b24) {
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		qlt_update_vp_map(vha, RESET_AL_PA);
+		vha->d_id = id;
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+	}
+}
+
 static int __init qlt_parse_ini_mode(void)
 {
 	if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0)
-- 
cgit v1.2.3


From ec7193e26055112bc824929fd943035f9a30b06f Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:55 -0700
Subject: qla2xxx: Fix delayed response to command for loop mode/direct
 connect.

Current driver wait for FW to be in the ready state before
processing in-coming commands. For Arbitrated Loop or
Point-to- Point (not switch), FW Ready state can take a while.
FW will transition to ready state after all Nports have been
logged in. In the mean time, certain initiators have completed
the login and starts IO. Driver needs to start processing all
queues if FW is already started.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    | 10 ++++++++--
 drivers/scsi/qla2xxx/qla_init.c   | 12 ++++++++++++
 drivers/scsi/qla2xxx/qla_isr.c    | 14 +++++++++++++-
 drivers/scsi/qla2xxx/qla_mbx.c    |  6 +++---
 drivers/scsi/qla2xxx/qla_os.c     | 21 ++++++++++++++++++++-
 drivers/scsi/qla2xxx/qla_target.c | 38 +++++++++++++++++++++++++-------------
 6 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 9251918773b1..ae119018dfaa 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3322,6 +3322,10 @@ struct qlt_hw_data {
 
 #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75	/* 75 percent */
 
+#define QLA_EARLY_LINKUP(_ha) \
+	((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \
+	 _ha->flags.fw_started && !_ha->flags.fw_init_done)
+
 /*
  * Qlogic host adapter specific data structure.
 */
@@ -3371,9 +3375,11 @@ struct qla_hw_data {
 		uint32_t	fawwpn_enabled:1;
 		uint32_t	exlogins_enabled:1;
 		uint32_t	exchoffld_enabled:1;
-		/* 35 bits */
 
+		uint32_t	lip_ae:1;
+		uint32_t	n2n_ae:1;
 		uint32_t	fw_started:1;
+		uint32_t	fw_init_done:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
@@ -3466,7 +3472,6 @@ struct qla_hw_data {
 #define P2P_LOOP  3
 	uint8_t		interrupts_on;
 	uint32_t	isp_abort_cnt;
-
 #define PCI_DEVICE_ID_QLOGIC_ISP2532    0x2532
 #define PCI_DEVICE_ID_QLOGIC_ISP8432    0x8432
 #define PCI_DEVICE_ID_QLOGIC_ISP8001	0x8001
@@ -3947,6 +3952,7 @@ typedef struct scsi_qla_host {
 	struct list_head vp_fcports;	/* list of fcports */
 	struct list_head work_list;
 	spinlock_t work_lock;
+	struct work_struct iocb_work;
 
 	/* Commonly used flags and state information. */
 	struct Scsi_Host *host;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index b0f6ad3020d3..f9d2fe7b1ade 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3178,6 +3178,7 @@ next_check:
 	} else {
 		ql_dbg(ql_dbg_init, vha, 0x00d3,
 		    "Init Firmware -- success.\n");
+		ha->flags.fw_started = 1;
 	}
 
 	return (rval);
@@ -4000,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 			atomic_set(&vha->loop_state, LOOP_READY);
 			ql_dbg(ql_dbg_disc, vha, 0x2069,
 			    "LOOP READY.\n");
+			ha->flags.fw_init_done = 1;
 
 			/*
 			 * Process any ATIO queue entries that came in
@@ -5491,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 	if (!(IS_P3P_TYPE(ha)))
 		ha->isp_ops->reset_chip(vha);
 
+	ha->flags.n2n_ae = 0;
+	ha->flags.lip_ae = 0;
+	ha->current_topology = 0;
+	ha->flags.fw_started = 0;
+	ha->flags.fw_init_done = 0;
 	ha->chip_reset++;
 
 	atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
@@ -6767,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
 		return;
 	if (!ha->fw_major_version)
 		return;
+	if (!ha->flags.fw_started)
+		return;
 
 	ret = qla2x00_stop_firmware(vha);
 	for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
@@ -6780,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
 		    "Attempting retry of stop-firmware command.\n");
 		ret = qla2x00_stop_firmware(vha);
 	}
+
+	ha->flags.fw_started = 0;
+	ha->flags.fw_init_done = 0;
 }
 
 int
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 3953c8d6af69..3203367a4f42 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -708,6 +708,8 @@ skip_rio:
 		    "mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx);
 
 		ha->isp_ops->fw_dump(vha, 1);
+		ha->flags.fw_init_done = 0;
+		ha->flags.fw_started = 0;
 
 		if (IS_FWI2_CAPABLE(ha)) {
 			if (mb[1] == 0 && mb[2] == 0) {
@@ -761,6 +763,9 @@ skip_rio:
 		break;
 
 	case MBA_LIP_OCCURRED:		/* Loop Initialization Procedure */
+		ha->flags.lip_ae = 1;
+		ha->flags.n2n_ae = 0;
+
 		ql_dbg(ql_dbg_async, vha, 0x5009,
 		    "LIP occurred (%x).\n", mb[1]);
 
@@ -797,6 +802,10 @@ skip_rio:
 		break;
 
 	case MBA_LOOP_DOWN:		/* Loop Down Event */
+		ha->flags.n2n_ae = 0;
+		ha->flags.lip_ae = 0;
+		ha->current_topology = 0;
+
 		mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha))
 			? RD_REG_WORD(&reg24->mailbox4) : 0;
 		mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(&reg82->mailbox_out[4])
@@ -866,6 +875,9 @@ skip_rio:
 
 	/* case MBA_DCBX_COMPLETE: */
 	case MBA_POINT_TO_POINT:	/* Point-to-Point */
+		ha->flags.lip_ae = 0;
+		ha->flags.n2n_ae = 1;
+
 		if (IS_QLA2100(ha))
 			break;
 
@@ -2706,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	struct sts_entry_24xx *pkt;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (!vha->flags.online)
+	if (!ha->flags.fw_started)
 		return;
 
 	while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 53d9579acc74..a113ab3592a7 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3638,11 +3638,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 
 	if (rptid_entry->format == 0) {
 		/* loop */
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
+		ql_dbg(ql_dbg_async, vha, 0x10b7,
 		    "Format 0 : Number of VPs setup %d, number of "
 		    "VPs acquired %d.\n", rptid_entry->vp_setup,
 		    rptid_entry->vp_acquired);
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
+		ql_dbg(ql_dbg_async, vha, 0x10b8,
 		    "Primary port id %02x%02x%02x.\n",
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
@@ -3651,7 +3651,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 
 	} else if (rptid_entry->format == 1) {
 		/* fabric */
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
+		ql_dbg(ql_dbg_async, vha, 0x10b9,
 		    "Format 1: VP[%d] enabled - status %d - with "
 		    "port id %02x%02x%02x.\n", rptid_entry->vp_idx,
 			rptid_entry->vp_status,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 344faf59783d..41d5b09f7326 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2560,6 +2560,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
 	return atomic_read(&vha->loop_state) == LOOP_READY;
 }
 
+static void qla2x00_iocb_work_fn(struct work_struct *work)
+{
+	struct scsi_qla_host *vha = container_of(work,
+		struct scsi_qla_host, iocb_work);
+	int cnt = 0;
+
+	while (!list_empty(&vha->work_list)) {
+		qla2x00_do_work(vha);
+		cnt++;
+		if (cnt > 10)
+			break;
+	}
+}
+
 /*
  * PCI driver interface
  */
@@ -3078,6 +3092,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	qla2xxx_wake_dpc(base_vha);
 
+	INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
 	INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error);
 
 	if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) {
@@ -4321,7 +4336,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 	spin_lock_irqsave(&vha->work_lock, flags);
 	list_add_tail(&e->list, &vha->work_list);
 	spin_unlock_irqrestore(&vha->work_lock, flags);
-	qla2xxx_wake_dpc(vha);
+
+	if (QLA_EARLY_LINKUP(vha->hw))
+		schedule_work(&vha->iocb_work);
+	else
+		qla2xxx_wake_dpc(vha);
 
 	return QLA_SUCCESS;
 }
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 7278e046bf87..0e03ca2ab3e5 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -638,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
 		break;
 	case SRB_NACK_PRLI:
 		fcport->fw_login_state = DSC_LS_PRLI_PEND;
+		fcport->deleted = 0;
 		c = "PRLI";
 		break;
 	case SRB_NACK_LOGO:
@@ -1576,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	request_t *pkt;
 	struct nack_to_isp *nack;
 
+	if (!ha->flags.fw_started)
+		return;
+
 	ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha);
 
 	/* Send marker if required */
@@ -3053,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 	else
 		vha->tgt_counters.core_qla_que_buf++;
 
-	if (!vha->flags.online || cmd->reset_count != ha->chip_reset) {
+	if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) {
 		/*
 		 * Either the port is not online or this request was from
 		 * previous life, just abort the processing.
@@ -3194,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
-	if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
+	if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) ||
 	    (cmd->sess && cmd->sess->deleted)) {
 		/*
 		 * Either the port is not online or this request was from
@@ -3372,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 	ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c,
 	    "Sending TERM ELS CTIO (ha=%p)\n", ha);
 
-	pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL);
+	pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL);
 	if (pkt == NULL) {
 		ql_dbg(ql_dbg_tgt, vha, 0xe080,
 		    "qla_target(%d): %s failed: unable to allocate "
@@ -4697,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 
 		if (sess != NULL) {
-			if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
+			if (sess->fw_login_state != DSC_LS_PLOGI_PEND &&
+			    sess->fw_login_state != DSC_LS_PLOGI_COMP) {
 				/*
 				 * Impatient initiator sent PRLI before last
 				 * PLOGI could finish. Will force him to re-try,
@@ -4736,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 		/* Make session global (not used in fabric mode) */
 		if (ha->current_topology != ISP_CFG_F) {
-			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
-			qla2xxx_wake_dpc(vha);
+			if (sess) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC post nack\n",
+				    __func__, __LINE__, sess->port_name);
+				qla24xx_post_nack_work(vha, sess, iocb,
+					SRB_NACK_PRLI);
+				res = 0;
+			} else {
+				set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+				set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
+			}
 		} else {
 			if (sess) {
 				ql_dbg(ql_dbg_disc, vha, 0xffff,
-					   "%s %d %8phC post nack\n",
-					   __func__, __LINE__, sess->port_name);
-
+				    "%s %d %8phC post nack\n",
+				    __func__, __LINE__, sess->port_name);
 				qla24xx_post_nack_work(vha, sess, iocb,
 					SRB_NACK_PRLI);
 				res = 0;
@@ -4752,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 		break;
 
-
 	case ELS_TPRLO:
 		if (le16_to_cpu(iocb->u.isp24.flags) &
 			NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
@@ -5222,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 	unsigned long flags;
 
 	if (unlikely(tgt == NULL)) {
-		ql_dbg(ql_dbg_io, vha, 0x3064,
+		ql_dbg(ql_dbg_tgt, vha, 0x3064,
 		    "ATIO pkt, but no tgt (ha %p)", ha);
 		return;
 	}
@@ -6359,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
 	struct atio_from_isp *pkt;
 	int cnt, i;
 
-	if (!vha->flags.online)
+	if (!ha->flags.fw_started)
 		return;
 
 	while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) ||
-- 
cgit v1.2.3


From 6c611d18f386d37cce3afbd921568e2a895bd86e Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Wed, 15 Mar 2017 09:48:56 -0700
Subject: qla2xxx: Update driver version to 9.00.00.00-k

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
signed-off-by: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_version.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 3cb1964b7786..45bc84e8e3bf 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,9 +7,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.07.00.38-k"
+#define QLA2XXX_VERSION      "9.00.00.00-k"
 
-#define QLA_DRIVER_MAJOR_VER	8
-#define QLA_DRIVER_MINOR_VER	7
+#define QLA_DRIVER_MAJOR_VER	9
+#define QLA_DRIVER_MINOR_VER	0
 #define QLA_DRIVER_PATCH_VER	0
 #define QLA_DRIVER_BETA_VER	0
-- 
cgit v1.2.3


From 6985bd5e21901c2d2bfc924b114887e20c002901 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Tue, 14 Mar 2017 22:05:20 +0100
Subject: iio: imu: st_lsm6dsx: fix FIFO_CTRL2 overwrite during watermark
 configuration

Fixes: 290a6ce11d93 (iio: imu: add support to lsm6dsx driver)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
index 78532ce07449..81b572d7699a 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -193,8 +193,8 @@ int st_lsm6dsx_update_watermark(struct st_lsm6dsx_sensor *sensor, u16 watermark)
 	if (err < 0)
 		goto out;
 
-	fifo_watermark = ((data & ~ST_LSM6DSX_FIFO_TH_MASK) << 8) |
-			  (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK);
+	fifo_watermark = ((data << 8) & ~ST_LSM6DSX_FIFO_TH_MASK) |
+			 (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK);
 
 	wdata = cpu_to_le16(fifo_watermark);
 	err = hw->tf->write(hw->dev, ST_LSM6DSX_REG_FIFO_THL_ADDR,
-- 
cgit v1.2.3


From 452b94b8c8c7eb7dd0d0fa9a9776e0d02cd73b97 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 19 Mar 2017 19:00:47 -0700
Subject: mm/swap: don't BUG_ON() due to uninitialized swap slot cache

This BUG_ON() triggered for me once at shutdown, and I don't see a
reason for the check.  The code correctly checks whether the swap slot
cache is usable or not, so an uninitialized swap slot cache is not
actually problematic afaik.

I've temporarily just switched the BUG_ON() to a WARN_ON_ONCE(), since
I'm not sure why that seemingly pointless check was there.  I suspect
the real fix is to just remove it entirely, but for now we'll warn about
it but not bring the machine down.

Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_slots.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 9b5bc86f96ad..7ebb23836f68 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,7 +267,7 @@ int free_swap_slot(swp_entry_t entry)
 {
 	struct swap_slots_cache *cache;
 
-	BUG_ON(!swap_slot_cache_initialized);
+	WARN_ON_ONCE(!swap_slot_cache_initialized);
 
 	cache = &get_cpu_var(swp_slots);
 	if (use_swap_slot_cache && cache->slots_ret) {
-- 
cgit v1.2.3


From 97da3854c526d3a6ee05c849c96e48d21527606c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 19 Mar 2017 19:09:39 -0700
Subject: Linux 4.11-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b841fb36beb2..b2faa9319372 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 0cdefd5b5485ee6eb3512a75739d09a4090176ed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 18 Mar 2017 21:53:20 -0700
Subject: ARM: dts: sun7i: lamobo-r1: Fix CPU port RGMII settings

The CPU port of the BCM53125 is configured with RGMII (no delays) but
this should actually be RGMII with transmit delay (rgmii-txid) because
STMMAC takes care of inserting the transmitter delay. This fixes
occasional packet loss encountered.

Fixes: d7b9eaff5f0c ("ARM: dts: sun7i: Add BCM53125 switch nodes to the lamobo-r1 board")
Reported-by: Hartmut Knaack <knaack.h@gmx.de>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
index 72ec0d5ae052..bbf1c8cbaac6 100644
--- a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
+++ b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
@@ -167,7 +167,7 @@
 					reg = <8>;
 					label = "cpu";
 					ethernet = <&gmac>;
-					phy-mode = "rgmii";
+					phy-mode = "rgmii-txid";
 					fixed-link {
 						speed = <1000>;
 						full-duplex;
-- 
cgit v1.2.3


From 9693219aa61dc7a75ac015e5c011e889cb821eec Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Sat, 18 Mar 2017 05:23:15 +0800
Subject: ARM: sun8i: a23/a33: drop bl_en_pin GPIO pinmux in reference design
 DTSI

The bl_en_pin GPIO pinmux is configured as "gpio_in", which makes it
conflicts with the real GPIO usage (out), and makes the backlight not
usable.

Drop the GPIO pinmux for it, thus this GPIO can be correctly used.

Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
index 7097c18ff487..d6bd15898db6 100644
--- a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
+++ b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
@@ -50,8 +50,6 @@
 
 	backlight: backlight {
 		compatible = "pwm-backlight";
-		pinctrl-names = "default";
-		pinctrl-0 = <&bl_en_pin>;
 		pwms = <&pwm 0 50000 PWM_POLARITY_INVERTED>;
 		brightness-levels = <0 10 20 30 40 50 60 70 80 90 100>;
 		default-brightness-level = <8>;
@@ -93,11 +91,6 @@
 };
 
 &pio {
-	bl_en_pin: bl_en_pin@0 {
-		pins = "PH6";
-		function = "gpio_in";
-	};
-
 	mmc0_cd_pin: mmc0_cd_pin@0 {
 		pins = "PB4";
 		function = "gpio_in";
-- 
cgit v1.2.3


From fe686babf4cf62b9b214b99847c735baa35a9fa2 Mon Sep 17 00:00:00 2001
From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Date: Wed, 15 Mar 2017 12:23:58 +0100
Subject: clk: sunxi-ng: Fix div/mult settings for osc12M on A64

The mult/div for osc12M was previously backwards (giving a 48M rate
for osc12M). Fix it.

Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Tested-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
index e3c084cc6da5..f54114c607df 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
@@ -566,7 +566,7 @@ static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu",
 			     0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT);
 
 /* Fixed Factor clocks */
-static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 1, 2, 0);
+static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 2, 1, 0);
 
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
-- 
cgit v1.2.3


From f363a06642f28caaa78cb6446bbad90c73fe183c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 20 Mar 2017 10:08:19 +0100
Subject: ALSA: ctxfi: Fix the incorrect check of dma_set_mask() call

In the commit [15c75b09f8d1: ALSA: ctxfi: Fallback DMA mask to 32bit],
I forgot to put "!" at dam_set_mask() call check in cthw20k1.c (while
cthw20k2.c is OK).  This patch fixes that obvious bug.

(As a side note: although the original commit was completely wrong,
 it's still working for most of machines, as it sets to 32bit DMA mask
 in the end.  So the bug severity is low.)

Fixes: 15c75b09f8d1 ("ALSA: ctxfi: Fallback DMA mask to 32bit")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ctxfi/cthw20k1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index ab4cdab5cfa5..79edd88d5cd0 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1905,7 +1905,7 @@ static int hw_card_start(struct hw *hw)
 		return err;
 
 	/* Set DMA transfer mask */
-	if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
+	if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
 		dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits));
 	} else {
 		dma_set_mask(&pci->dev, DMA_BIT_MASK(32));
-- 
cgit v1.2.3


From 07f5ab6002a4f0b633f3495157166f9f6180871b Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Thu, 23 Feb 2017 08:57:26 +0530
Subject: cxl: Route eeh events to all slices for pci_channel_io_perm_failure
 state

Fix a boundary condition where in some cases an eeh event with state ==
pci_channel_io_perm_failure wont be passed on to a driver attached to
the virtual PCI device associated with a slice. This will happen in case
the slice just before (n-1) doesn't have any vPHB bus associated with
it, that results in an early return from cxl_pci_error_detected()
callback.

With state == pci_channel_io_perm_failure, the adapter will be removed
irrespective of the return value of cxl_vphb_error_detected(). So we now
always return PCI_ERS_RESULT_DISCONNECTED for this case i.e even if
the AFU isn't using a vPHB (currently returns PCI_ERS_RESULT_NONE).

Fixes: e4f5fc001a6("cxl: Do not create vPHB if there are no AFU configuration records")
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Reviewed-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/pci.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 91f645992c94..b27ea98b781f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1792,15 +1792,14 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
 
 	/* If we're permanently dead, give up. */
 	if (state == pci_channel_io_perm_failure) {
-		/* Tell the AFU drivers; but we don't care what they
-		 * say, we're going away.
-		 */
 		for (i = 0; i < adapter->slices; i++) {
 			afu = adapter->afu[i];
-			/* Only participate in EEH if we are on a virtual PHB */
-			if (afu->phb == NULL)
-				return PCI_ERS_RESULT_NONE;
-			cxl_vphb_error_detected(afu, state);
+			/*
+			 * Tell the AFU drivers; but we don't care what they
+			 * say, we're going away.
+			 */
+			if (afu->phb != NULL)
+				cxl_vphb_error_detected(afu, state);
 		}
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
-- 
cgit v1.2.3


From b467e08a15563dede0d37d3233baa24fb97a7310 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Sat, 18 Mar 2017 04:19:43 +0800
Subject: clk: sunxi-ng: fix recalc_rate formula of NKMP clocks

In commit e66f81bbd746 ("clk: sunxi-ng: Implement factors offsets"), the
final formula of NKMP clocks' recalc_rate is refactored; however, the
refactored formula broke the calculation due to some C language operand
priority problem -- the priority of operand >> is lower than * and /,
makes the formula being parsed as "(parent_rate * n * k) >> (p / m)", but
it should be "(parent_rate * n * k >> p) / m".

Add the pair of parentheses to fix up this issue. This pair of
parentheses used to exist in the old formula.

Fixes: e66f81bbd746 ("clk: sunxi-ng: Implement factors offsets")
Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_nkmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index a2b40a000157..488055ed944f 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -107,7 +107,7 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
 	p = reg >> nkmp->p.shift;
 	p &= (1 << nkmp->p.width) - 1;
 
-	return parent_rate * n * k >> p / m;
+	return (parent_rate * n * k >> p) / m;
 }
 
 static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
-- 
cgit v1.2.3


From 6d98ce0be541d4a3cfbb52cd75072c0339ebb500 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 17 Mar 2017 15:13:20 +1000
Subject: powerpc/64s: Fix idle wakeup potential to clobber registers

We concluded there may be a window where the idle wakeup code could get
to pnv_wakeup_tb_loss() (which clobbers non-volatile GPRs), but the
hardware may set SRR1[46:47] to 01b (no state loss) which would result
in the wakeup code failing to restore non-volatile GPRs.

I was not able to trigger this condition with trivial tests on real
hardware or simulator, but the ISA (at least 2.07) seems to allow for
it, and Gautham says that it can happen if there is an exception pending
when the sleep/winkle instruction is executed.

Fixes: 1706567117ba ("powerpc/kvm: make hypervisor state restore a function")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 995728736677..6fd08219248d 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -449,9 +449,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 _GLOBAL(pnv_wakeup_tb_loss)
 	ld	r1,PACAR1(r13)
 	/*
-	 * Before entering any idle state, the NVGPRs are saved in the stack
-	 * and they are restored before switching to the process context. Hence
-	 * until they are restored, they are free to be used.
+	 * Before entering any idle state, the NVGPRs are saved in the stack.
+	 * If there was a state loss, or PACA_NAPSTATELOST was set, then the
+	 * NVGPRs are restored. If we are here, it is likely that state is lost,
+	 * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
+	 * here are the same as the test to restore NVGPRS:
+	 * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
+	 * and SRR1 test for restoring NVGPRs.
+	 *
+	 * We are about to clobber NVGPRs now, so set NAPSTATELOST to
+	 * guarantee they will always be restored. This might be tightened
+	 * with careful reading of specs (particularly for ISA300) but this
+	 * is already a slow wakeup path and it's simpler to be safe.
+	 */
+	li	r0,1
+	stb	r0,PACA_NAPSTATELOST(r13)
+
+	/*
 	 *
 	 * Save SRR1 and LR in NVGPRs as they might be clobbered in
 	 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
-- 
cgit v1.2.3


From 04d5466a976b096364a39a63ac264c1b3a5f8fa1 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 9 Mar 2017 13:29:13 +0100
Subject: ALSA: hda - add support for docking station for HP 820 G2

This tested patch adds missing initialization for Line-In/Out PINs for
the docking station for HP 820 G2.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4e112221d825..8d6b3703d0a2 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4847,6 +4847,7 @@ enum {
 	ALC286_FIXUP_HP_GPIO_LED,
 	ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
 	ALC280_FIXUP_HP_DOCK_PINS,
+	ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
 	ALC280_FIXUP_HP_9480M,
 	ALC288_FIXUP_DELL_HEADSET_MODE,
 	ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -5388,6 +5389,16 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC280_FIXUP_HP_GPIO4
 	},
+	[ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1b, 0x21011020 }, /* line-out */
+			{ 0x18, 0x2181103f }, /* line-in */
+			{ },
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
+	},
 	[ALC280_FIXUP_HP_9480M] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc280_fixup_hp_9480m,
@@ -5647,7 +5658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
 	SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
 	SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
-	SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+	SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
 	SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -5816,6 +5827,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
 	{.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
 	{.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
+	{.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
 	{.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
 	{.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
 	{.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
-- 
cgit v1.2.3


From cc3a47a248d7791ef0d2c81a35c46769e55e4c6c Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 9 Mar 2017 13:30:09 +0100
Subject: ALSA: hda - add support for docking station for HP 840 G3

This tested patch adds missing initialization for Line-In/Out PINs for
the docking station for HP 840 G3.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index c15c51bea26d..69266b8ea2ad 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -261,6 +261,7 @@ enum {
 	CXT_FIXUP_HP_530,
 	CXT_FIXUP_CAP_MIX_AMP_5047,
 	CXT_FIXUP_MUTE_LED_EAPD,
+	CXT_FIXUP_HP_DOCK,
 	CXT_FIXUP_HP_SPECTRE,
 	CXT_FIXUP_HP_GATE_MIC,
 };
@@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = cxt_fixup_mute_led_eapd,
 	},
+	[CXT_FIXUP_HP_DOCK] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x16, 0x21011020 }, /* line-out */
+			{ 0x18, 0x2181103f }, /* line-in */
+			{ }
+		}
+	},
 	[CXT_FIXUP_HP_SPECTRE] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+	SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
 	SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
 	SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
@@ -871,6 +881,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
 	{ .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
 	{ .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
 	{ .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
+	{ .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
 	{}
 };
 
-- 
cgit v1.2.3


From fdfe4a393e9cd8c92f4489ca207d410f44d05043 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 23:45:21 +0900
Subject: generic syscalls: Wire up statx syscall

The new syscall statx is implemented as generic code, so enable it
for architectures like openrisc which use the generic syscall table.

Fixes: a528d35e8bfcc ("statx: Add a system call to make enhanced file info available")
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/uapi/asm-generic/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 9b1462e38b82..a076cf1a3a23 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -730,9 +730,11 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
 __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 #define __NR_pkey_free 290
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
+#define __NR_statx 291
+__SYSCALL(__NR_statx,     sys_statx)
 
 #undef __NR_syscalls
-#define __NR_syscalls 291
+#define __NR_syscalls 292
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.2.3


From 720037f939fa50fc3531035ae61b4cf4b0ff35e5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 6 Mar 2017 11:59:56 -0800
Subject: f2fs: don't overwrite node block by SSR

This patch fixes that SSR can overwrite previous warm node block consisting of
a node chain since the last checkpoint.

Fixes: 5b6c6be2d878 ("f2fs: use SSR for warm node as well")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4bd7a8b19332..29ef7088c558 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 		if (f2fs_discard_en(sbi) &&
 			!f2fs_test_and_set_bit(offset, se->discard_map))
 			sbi->discard_blks--;
+
+		/* don't overwrite by SSR to keep node chain */
+		if (se->type == CURSEG_WARM_NODE) {
+			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+				se->ckpt_valid_blocks++;
+		}
 	} else {
 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
 #ifdef CONFIG_F2FS_CHECK_FS
-- 
cgit v1.2.3


From 9f7e4a2c49fd166f17cf4125766a68dce8716764 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 10 Mar 2017 09:39:57 -0800
Subject: f2fs: declare static functions

This is to avoid build warning reported by kbuild test robot.

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94967171dee8..a0a060c2979b 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1823,7 +1823,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 		kmem_cache_free(free_nid_slab, i);
 }
 
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+					bool set)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -2383,7 +2384,7 @@ add_out:
 	list_add_tail(&nes->set_list, head);
 }
 
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 						struct page *page)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2638,7 +2639,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
-- 
cgit v1.2.3


From 23380b8568b85cd4b7a056891f4dbf131f7b871d Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 7 Mar 2017 14:11:06 -0800
Subject: f2fs: use __set{__clear}_bit_le

This patch uses __set{__clear}_bit_le for highter speed.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c  |  2 +-
 fs/f2fs/node.c | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4650c9b85de7..8d5c62b07b28 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	dentry_blk = page_address(page);
 	bit_pos = dentry - dentry_blk->dentry;
 	for (i = 0; i < slots; i++)
-		clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+		__clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
 
 	/* Let's check and deallocate this dentry page */
 	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a0a060c2979b..8c81ff614d1a 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -339,7 +339,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 	__set_nat_cache_dirty(nm_i, e);
 
 	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-		clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
+		__clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
 
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
@@ -1834,9 +1834,9 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 		return;
 
 	if (set)
-		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 	else
-		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1848,7 +1848,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
-	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
 
@@ -2403,16 +2403,16 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 			valid++;
 	}
 	if (valid == 0) {
-		set_bit_le(nat_index, nm_i->empty_nat_bits);
-		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		__set_bit_le(nat_index, nm_i->empty_nat_bits);
+		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 		return;
 	}
 
-	clear_bit_le(nat_index, nm_i->empty_nat_bits);
+	__clear_bit_le(nat_index, nm_i->empty_nat_bits);
 	if (valid == NAT_ENTRY_PER_BLOCK)
-		set_bit_le(nat_index, nm_i->full_nat_bits);
+		__set_bit_le(nat_index, nm_i->full_nat_bits);
 	else
-		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 }
 
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
-- 
cgit v1.2.3


From 586d1492f301982e349797cfb05d9f343002ffa2 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 1 Mar 2017 17:09:07 +0800
Subject: f2fs: skip scanning free nid bitmap of full NAT blocks

This patch adds to account free nids for each NAT blocks, and while
scanning all free nid bitmap, do check count and skip lookuping in
full NAT block.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c |  1 +
 fs/f2fs/f2fs.h  |  2 ++
 fs/f2fs/node.c  | 33 +++++++++++++++++++++++++++------
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a77df377e2e8..ee2d0a485fc3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
 	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
 	si->base_mem += NM_I(sbi)->nat_blocks / 8;
+	si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e849f83d6114..0a6e115562f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
 	struct mutex build_lock;	/* lock for build free nids */
 	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
 	unsigned char *nat_block_bitmap;
+	unsigned short *free_nid_count;	/* free nid count of NAT block */
+	spinlock_t free_nid_lock;	/* protect updating of nid count */
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8c81ff614d1a..87a2b1f740cc 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1824,7 +1824,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 }
 
 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
-					bool set)
+							bool set, bool build)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1837,6 +1837,13 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 	else
 		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+	spin_lock(&nm_i->free_nid_lock);
+	if (set)
+		nm_i->free_nid_count[nat_ofs]++;
+	else if (!build)
+		nm_i->free_nid_count[nat_ofs]--;
+	spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1848,6 +1855,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
+	if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+		return;
+
 	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
@@ -1862,7 +1872,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
 			freed = add_free_nid(sbi, start_nid, true);
-		update_free_nid_bitmap(sbi, start_nid, freed);
+		update_free_nid_bitmap(sbi, start_nid, freed, true);
 	}
 }
 
@@ -1878,6 +1888,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		if (!test_bit_le(i, nm_i->nat_block_bitmap))
 			continue;
+		if (!nm_i->free_nid_count[i])
+			continue;
 		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
 			nid_t nid;
 
@@ -2082,7 +2094,7 @@ retry:
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
 
-		update_free_nid_bitmap(sbi, *nid, false);
+		update_free_nid_bitmap(sbi, *nid, false, false);
 
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
@@ -2138,7 +2150,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
-	update_free_nid_bitmap(sbi, nid, true);
+	update_free_nid_bitmap(sbi, nid, true, false);
 
 	spin_unlock(&nm_i->nid_list_lock);
 
@@ -2468,11 +2480,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
-			update_free_nid_bitmap(sbi, nid, true);
+			update_free_nid_bitmap(sbi, nid, true, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		} else {
 			spin_lock(&NM_I(sbi)->nid_list_lock);
-			update_free_nid_bitmap(sbi, nid, false);
+			update_free_nid_bitmap(sbi, nid, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2652,6 +2664,14 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 								GFP_KERNEL);
 	if (!nm_i->nat_block_bitmap)
 		return -ENOMEM;
+
+	nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+					sizeof(unsigned short), GFP_KERNEL);
+	if (!nm_i->free_nid_count)
+		return -ENOMEM;
+
+	spin_lock_init(&nm_i->free_nid_lock);
+
 	return 0;
 }
 
@@ -2731,6 +2751,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 
 	kvfree(nm_i->nat_block_bitmap);
 	kvfree(nm_i->free_nid_bitmap);
+	kvfree(nm_i->free_nid_count);
 
 	kfree(nm_i->nat_bitmap);
 	kfree(nm_i->nat_bits);
-- 
cgit v1.2.3


From 7041d5d286fb54635f540c1bb3b43980ed65513a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 8 Mar 2017 20:07:49 +0800
Subject: f2fs: combine nat_bits and free_nid_bitmap cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both nat_bits cache and free_nid_bitmap cache provide same functionality
as a intermediate cache between free nid cache and disk, but with
different granularity of indicating free nid range, and different
persistence policy. nat_bits cache provides better persistence ability,
and free_nid_bitmap provides better granularity.

In this patch we combine advantage of both caches, so finally policy of
the intermediate cache would be:
- init: load free nid status from nat_bits into free_nid_bitmap
- lookup: scan free_nid_bitmap before load NAT blocks
- update: update free_nid_bitmap in real-time
- persistence: udpate and persist nat_bits in checkpoint

This patch also resolves performance regression reported by lkp-robot.

commit:
  4ac912427c4214d8031d9ad6fbc3bc75e71512df ("f2fs: introduce free nid bitmap")
  d00030cf9cd0bb96fdccc41e33d3c91dcbb672ba ("f2fs: use __set{__clear}_bit_le")
  1382c0f3f9d3f936c8bc42ed1591cf7a593ef9f7 ("f2fs: combine nat_bits and free_nid_bitmap cache")

4ac912427c4214d8 d00030cf9cd0bb96fdccc41e33 1382c0f3f9d3f936c8bc42ed15
---------------- -------------------------- --------------------------
         %stddev     %change         %stddev     %change         %stddev
             \          |                \          |                \
     77863 ±  0%      +2.1%      79485 ±  1%     +50.8%     117404 ±  0%  aim7.jobs-per-min
    231.63 ±  0%      -2.0%     227.01 ±  1%     -33.6%     153.80 ±  0%  aim7.time.elapsed_time
    231.63 ±  0%      -2.0%     227.01 ±  1%     -33.6%     153.80 ±  0%  aim7.time.elapsed_time.max
    896604 ±  0%      -0.8%     889221 ±  3%     -20.2%     715260 ±  1%  aim7.time.involuntary_context_switches
      2394 ±  1%      +4.6%       2503 ±  1%      +3.7%       2481 ±  2%  aim7.time.maximum_resident_set_size
      6240 ±  0%      -1.5%       6145 ±  1%     -14.1%       5360 ±  1%  aim7.time.system_time
   1111357 ±  3%      +1.9%    1132509 ±  2%      -6.2%    1041932 ±  2%  aim7.time.voluntary_context_switches
...

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 125 ++++++++++++++++++++++-----------------------------------
 1 file changed, 47 insertions(+), 78 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 87a2b1f740cc..481aa8dc79f4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
-	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-		__clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
 		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1824,7 +1821,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 }
 
 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
-							bool set, bool build)
+			bool set, bool build, bool locked)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1838,12 +1835,14 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 	else
 		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 
-	spin_lock(&nm_i->free_nid_lock);
+	if (!locked)
+		spin_lock(&nm_i->free_nid_lock);
 	if (set)
 		nm_i->free_nid_count[nat_ofs]++;
 	else if (!build)
 		nm_i->free_nid_count[nat_ofs]--;
-	spin_unlock(&nm_i->free_nid_lock);
+	if (!locked)
+		spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1872,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
 			freed = add_free_nid(sbi, start_nid, true);
-		update_free_nid_bitmap(sbi, start_nid, freed, true);
+		update_free_nid_bitmap(sbi, start_nid, freed, true, false);
 	}
 }
 
@@ -1920,58 +1919,6 @@ out:
 	up_read(&nm_i->nat_tree_lock);
 }
 
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
-	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	struct page *page;
-	unsigned int i = 0;
-	nid_t nid;
-
-	if (!enabled_nat_bits(sbi, NULL))
-		return -EAGAIN;
-
-	down_read(&nm_i->nat_tree_lock);
-check_empty:
-	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
-	if (i >= nm_i->nat_blocks) {
-		i = 0;
-		goto check_partial;
-	}
-
-	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
-									nid++) {
-		if (unlikely(nid >= nm_i->max_nid))
-			break;
-		add_free_nid(sbi, nid, true);
-	}
-
-	if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
-		goto out;
-	i++;
-	goto check_empty;
-
-check_partial:
-	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
-	if (i >= nm_i->nat_blocks) {
-		disable_nat_bits(sbi, true);
-		up_read(&nm_i->nat_tree_lock);
-		return -EINVAL;
-	}
-
-	nid = i * NAT_ENTRY_PER_BLOCK;
-	page = get_current_nat_page(sbi, nid);
-	scan_nat_page(sbi, page, nid);
-	f2fs_put_page(page, 1);
-
-	if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
-		i++;
-		goto check_partial;
-	}
-out:
-	up_read(&nm_i->nat_tree_lock);
-	return 0;
-}
-
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1993,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 
 		if (nm_i->nid_cnt[FREE_NID_LIST])
 			return;
-
-		/* try to find free nids with nat_bits */
-		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-			return;
-	}
-
-	/* find next valid candidate */
-	if (enabled_nat_bits(sbi, NULL)) {
-		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
-					nm_i->nat_blocks, 0);
-
-		if (idx >= nm_i->nat_blocks)
-			set_sbi_flag(sbi, SBI_NEED_FSCK);
-		else
-			nid = idx * NAT_ENTRY_PER_BLOCK;
 	}
 
 	/* readahead nat pages to be scanned */
@@ -2094,7 +2026,7 @@ retry:
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
 
-		update_free_nid_bitmap(sbi, *nid, false, false);
+		update_free_nid_bitmap(sbi, *nid, false, false, false);
 
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
@@ -2150,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
-	update_free_nid_bitmap(sbi, nid, true, false);
+	update_free_nid_bitmap(sbi, nid, true, false, false);
 
 	spin_unlock(&nm_i->nid_list_lock);
 
@@ -2480,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
-			update_free_nid_bitmap(sbi, nid, true, false);
+			update_free_nid_bitmap(sbi, nid, true, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		} else {
 			spin_lock(&NM_I(sbi)->nid_list_lock);
-			update_free_nid_bitmap(sbi, nid, false, false);
+			update_free_nid_bitmap(sbi, nid, false, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2590,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int i = 0;
+	nid_t nid, last_nid;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return;
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+		if (i >= nm_i->nat_blocks)
+			break;
+
+		__set_bit_le(i, nm_i->nat_block_bitmap);
+
+		nid = i * NAT_ENTRY_PER_BLOCK;
+		last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+		spin_lock(&nm_i->free_nid_lock);
+		for (; nid < last_nid; nid++)
+			update_free_nid_bitmap(sbi, nid, true, true, true);
+		spin_unlock(&nm_i->free_nid_lock);
+	}
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+		if (i >= nm_i->nat_blocks)
+			break;
+
+		__set_bit_le(i, nm_i->nat_block_bitmap);
+	}
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2691,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	/* load free nid status from nat_bits table */
+	load_free_nid_bitmap(sbi);
+
 	build_free_nids(sbi, true, true);
 	return 0;
 }
-- 
cgit v1.2.3


From 6be3b6cce1e225f189b68b4e84fc711d19b4277b Mon Sep 17 00:00:00 2001
From: Ryan Hsu <ryanhsu@qca.qualcomm.com>
Date: Mon, 13 Mar 2017 15:49:03 -0700
Subject: ath10k: fix incorrect wlan_mac_base in qca6174_regs

In the 'commit ebee76f7fa46 ("ath10k: allow setting coverage class")',
it inherits the design and the address offset from ath9k, but the address
is not applicable to QCA6174, which leads to a random crash while doing the
resume() operation, since the set_coverage_class.ops will be called from
ieee80211_reconfig() when resume() (if the wow is not configured).

Fix the incorrect address offset here to avoid the random crash.

Verified on QCA6174/hw3.0 with firmware WLAN.RM.4.4-00022-QCARMSWPZ-2.

kvalo: this also seems to fix a regression with firmware restart.

Fixes: ebee76f7fa46 ("ath10k: allow setting coverage class")
Cc: <stable@vger.kernel.org> # v4.10
Signed-off-by: Ryan Hsu <ryanhsu@qca.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
---
 drivers/net/wireless/ath/ath10k/hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c
index 33fb26833cd0..d9f37ee4bfdd 100644
--- a/drivers/net/wireless/ath/ath10k/hw.c
+++ b/drivers/net/wireless/ath/ath10k/hw.c
@@ -51,7 +51,7 @@ const struct ath10k_hw_regs qca6174_regs = {
 	.rtc_soc_base_address			= 0x00000800,
 	.rtc_wmac_base_address			= 0x00001000,
 	.soc_core_base_address			= 0x0003a000,
-	.wlan_mac_base_address			= 0x00020000,
+	.wlan_mac_base_address			= 0x00010000,
 	.ce_wrapper_base_address		= 0x00034000,
 	.ce0_base_address			= 0x00034400,
 	.ce1_base_address			= 0x00034800,
-- 
cgit v1.2.3


From c3104aae5d8cc443556f8613466e16737326e215 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 13 Mar 2017 17:36:25 +0100
Subject: remoteproc: qcom: fix QCOM_SMD dependencies

qcom_smd_register_edge() is provided by either QCOM_SMD or RPMSG_QCOM_SMD,
and if both of them are disabled, it does nothing.

The check for the PIL drivers however only checks for QCOM_SMD, so it breaks
with QCOM_SMD=n && RPMSG_QCOM_SMD=m:

drivers/remoteproc/built-in.o: In function `smd_subdev_remove':
qcom_wcnss_iris.c:(.text+0x231c): undefined reference to `qcom_smd_unregister_edge'
drivers/remoteproc/built-in.o: In function `smd_subdev_probe':
qcom_wcnss_iris.c:(.text+0x2344): undefined reference to `qcom_smd_register_edge'
drivers/remoteproc/built-in.o: In function `smd_subdev_probe':
qcom_q6v5_pil.c:(.text+0x3538): undefined reference to `qcom_smd_register_edge'
qcom_q6v5_pil.c:(.text+0x3538): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `qcom_smd_register_edge'

This clarifies the Kconfig dependency.

Fixes: 4b48921a8f74 ("remoteproc: qcom: Use common SMD edge handler")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 65f86bc24c07..1dc43fc5f65f 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL
 	depends on OF && ARCH_QCOM
 	depends on REMOTEPROC
 	depends on QCOM_SMEM
-	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
 	select MFD_SYSCON
 	select QCOM_MDT_LOADER
 	select QCOM_RPROC_COMMON
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL
 	depends on OF && ARCH_QCOM
 	depends on QCOM_SMEM
 	depends on REMOTEPROC
-	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
 	select MFD_SYSCON
 	select QCOM_RPROC_COMMON
 	select QCOM_SCM
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL
 config QCOM_WCNSS_PIL
 	tristate "Qualcomm WCNSS Peripheral Image Loader"
 	depends on OF && ARCH_QCOM
-	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
 	depends on QCOM_SMEM
 	depends on REMOTEPROC
 	select QCOM_MDT_LOADER
-- 
cgit v1.2.3


From 4296f23ed49a15d36949458adcc66ff993dee2a8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 19 Mar 2017 14:30:02 +0100
Subject: cpufreq: schedutil: Fix per-CPU structure initialization in
 sugov_start()

sugov_start() only initializes struct sugov_cpu per-CPU structures
for shared policies, but it should do that for single-CPU policies too.

That in particular makes the IO-wait boost mechanism work in the
cases when cpufreq policies correspond to individual CPUs.

Fixes: 21ca6d2c52f8 (cpufreq: schedutil: Add iowait boosting)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.9+ <stable@vger.kernel.org> # 4.9+
---
 kernel/sched/cpufreq_schedutil.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cd7cd489f739..54c577578da6 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -584,20 +584,14 @@ static int sugov_start(struct cpufreq_policy *policy)
 	for_each_cpu(cpu, policy->cpus) {
 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 
+		memset(sg_cpu, 0, sizeof(*sg_cpu));
 		sg_cpu->sg_policy = sg_policy;
-		if (policy_is_shared(policy)) {
-			sg_cpu->util = 0;
-			sg_cpu->max = 0;
-			sg_cpu->flags = SCHED_CPUFREQ_RT;
-			sg_cpu->last_update = 0;
-			sg_cpu->iowait_boost = 0;
-			sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
-			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-						     sugov_update_shared);
-		} else {
-			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-						     sugov_update_single);
-		}
+		sg_cpu->flags = SCHED_CPUFREQ_RT;
+		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+					     policy_is_shared(policy) ?
+							sugov_update_shared :
+							sugov_update_single);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From ac7ce78ba036c0f9952d9706b1941c7d80c78680 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 14 Feb 2017 10:46:20 +0300
Subject: drm/exynos/decon5433: & vs | typo

"&" was obviously intended instead of "|".  The original condition is
always true.

Fixes: b93c2e8b5d9d ("drm/exynos/decon5433: configure sysreg in case of hardware trigger")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 0fd6f7a18364..cca32a4fdab3 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -678,7 +678,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 		ctx->out_type |= IFTYPE_I80;
 	}
 
-	if (ctx->out_type | I80_HW_TRG) {
+	if (ctx->out_type & I80_HW_TRG) {
 		ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
 							"samsung,disp-sysreg");
 		if (IS_ERR(ctx->sysreg)) {
-- 
cgit v1.2.3


From 6bdc92ee4980ca10171a8de338fad612f00bb48f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 11 Mar 2017 20:04:16 +0200
Subject: drm/exynos: Remove support for Exynos4415 (SoC not supported anymore)

Support for Exynos4415 is going away because there are no internal nor
external users.

Since commit 46dcf0ff0de3 ("ARM: dts: exynos: Remove exynos4415.dtsi"),
the platform cannot be instantiated so remove also the drivers.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Kukjin Kim <kgene@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../devicetree/bindings/display/exynos/exynos_dsim.txt |  1 -
 .../bindings/display/exynos/samsung-fimd.txt           |  1 -
 drivers/gpu/drm/exynos/exynos_drm_dsi.c                | 15 +--------------
 drivers/gpu/drm/exynos/exynos_drm_fimd.c               | 18 ++----------------
 4 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
index a78265993665..ca5204b3bc21 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
@@ -4,7 +4,6 @@ Required properties:
   - compatible: value should be one of the following
 		"samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */
 		"samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */
-		"samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */
 		"samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */
 		"samsung,exynos5422-mipi-dsi" /* for Exynos5422/5800 SoCs */
 		"samsung,exynos5433-mipi-dsi" /* for Exynos5433 SoCs */
diff --git a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
index 18645e0228b0..5837402c3ade 100644
--- a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
+++ b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
@@ -11,7 +11,6 @@ Required properties:
 		"samsung,s5pv210-fimd"; /* for S5PV210 SoC */
 		"samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */
 		"samsung,exynos4210-fimd"; /* for Exynos4 SoCs */
-		"samsung,exynos4415-fimd"; /* for Exynos4415 SoC */
 		"samsung,exynos5250-fimd"; /* for Exynos5250 SoCs */
 		"samsung,exynos5420-fimd"; /* for Exynos5420/5422/5800 SoCs */
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 812e2ec0761d..ef6f9c6de098 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -86,7 +86,7 @@
 #define DSIM_SYNC_INFORM		(1 << 27)
 #define DSIM_EOT_DISABLE		(1 << 28)
 #define DSIM_MFLUSH_VS			(1 << 29)
-/* This flag is valid only for exynos3250/3472/4415/5260/5430 */
+/* This flag is valid only for exynos3250/3472/5260/5430 */
 #define DSIM_CLKLANE_STOP		(1 << 30)
 
 /* DSIM_ESCMODE */
@@ -473,17 +473,6 @@ static const struct exynos_dsi_driver_data exynos4_dsi_driver_data = {
 	.reg_values = reg_values,
 };
 
-static const struct exynos_dsi_driver_data exynos4415_dsi_driver_data = {
-	.reg_ofs = exynos_reg_ofs,
-	.plltmr_reg = 0x58,
-	.has_clklane_stop = 1,
-	.num_clks = 2,
-	.max_freq = 1000,
-	.wait_for_reset = 1,
-	.num_bits_resol = 11,
-	.reg_values = reg_values,
-};
-
 static const struct exynos_dsi_driver_data exynos5_dsi_driver_data = {
 	.reg_ofs = exynos_reg_ofs,
 	.plltmr_reg = 0x58,
@@ -521,8 +510,6 @@ static const struct of_device_id exynos_dsi_of_match[] = {
 	  .data = &exynos3_dsi_driver_data },
 	{ .compatible = "samsung,exynos4210-mipi-dsi",
 	  .data = &exynos4_dsi_driver_data },
-	{ .compatible = "samsung,exynos4415-mipi-dsi",
-	  .data = &exynos4415_dsi_driver_data },
 	{ .compatible = "samsung,exynos5410-mipi-dsi",
 	  .data = &exynos5_dsi_driver_data },
 	{ .compatible = "samsung,exynos5422-mipi-dsi",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index a9fa444c6053..661b9fe049e2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -71,10 +71,10 @@
 #define TRIGCON				0x1A4
 #define TRGMODE_ENABLE			(1 << 0)
 #define SWTRGCMD_ENABLE			(1 << 1)
-/* Exynos3250, 3472, 4415, 5260 5410, 5420 and 5422 only supported. */
+/* Exynos3250, 3472, 5260 5410, 5420 and 5422 only supported. */
 #define HWTRGEN_ENABLE			(1 << 3)
 #define HWTRGMASK_ENABLE		(1 << 4)
-/* Exynos3250, 3472, 4415, 5260, 5420 and 5422 only supported. */
+/* Exynos3250, 3472, 5260, 5420 and 5422 only supported. */
 #define HWTRIGEN_PER_ENABLE		(1 << 31)
 
 /* display mode change control register except exynos4 */
@@ -138,18 +138,6 @@ static struct fimd_driver_data exynos4_fimd_driver_data = {
 	.has_vtsel = 1,
 };
 
-static struct fimd_driver_data exynos4415_fimd_driver_data = {
-	.timing_base = 0x20000,
-	.lcdblk_offset = 0x210,
-	.lcdblk_vt_shift = 10,
-	.lcdblk_bypass_shift = 1,
-	.trg_type = I80_HW_TRG,
-	.has_shadowcon = 1,
-	.has_vidoutcon = 1,
-	.has_vtsel = 1,
-	.has_trigger_per_te = 1,
-};
-
 static struct fimd_driver_data exynos5_fimd_driver_data = {
 	.timing_base = 0x20000,
 	.lcdblk_offset = 0x214,
@@ -210,8 +198,6 @@ static const struct of_device_id fimd_driver_dt_match[] = {
 	  .data = &exynos3_fimd_driver_data },
 	{ .compatible = "samsung,exynos4210-fimd",
 	  .data = &exynos4_fimd_driver_data },
-	{ .compatible = "samsung,exynos4415-fimd",
-	  .data = &exynos4415_fimd_driver_data },
 	{ .compatible = "samsung,exynos5250-fimd",
 	  .data = &exynos5_fimd_driver_data },
 	{ .compatible = "samsung,exynos5420-fimd",
-- 
cgit v1.2.3


From a392276d1dec63e5aabe6f527c37de29a729559a Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:56 +0100
Subject: drm/exynos: move crtc event handling to drivers callbacks

CRTC event is currently send with next vblank, or instantly in case crtc
is being disabled. This approach usually works, but in corner cases it can
result in premature event generation. Only device driver is able to verify
if the event can be sent. This patch is a first step in that direction - it
moves event handling to the drivers.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c |  1 +
 drivers/gpu/drm/exynos/exynos7_drm_decon.c    |  1 +
 drivers/gpu/drm/exynos/exynos_drm_crtc.c      | 29 +++++++++++++++------------
 drivers/gpu/drm/exynos/exynos_drm_crtc.h      |  2 ++
 drivers/gpu/drm/exynos/exynos_drm_fimd.c      |  2 ++
 drivers/gpu/drm/exynos/exynos_drm_vidi.c      |  1 +
 drivers/gpu/drm/exynos/exynos_mixer.c         |  1 +
 7 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index cca32a4fdab3..2130ccf1e036 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -378,6 +378,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	if (ctx->out_type & IFTYPE_I80)
 		set_bit(BIT_WIN_UPDATED, &ctx->flags);
+	exynos_crtc_handle_event(crtc);
 }
 
 static void decon_swreset(struct decon_context *ctx)
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f9ab19e205e2..48811806fa27 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -526,6 +526,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	for (i = 0; i < WINDOWS_NR; i++)
 		decon_shadow_protect_win(ctx, i, false);
+	exynos_crtc_handle_event(crtc);
 }
 
 static void decon_init(struct decon_context *ctx)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 5367b6664fe3..c65f4509932c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -85,16 +85,28 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
 				     struct drm_crtc_state *old_crtc_state)
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
-	struct drm_pending_vblank_event *event;
-	unsigned long flags;
 
 	if (exynos_crtc->ops->atomic_flush)
 		exynos_crtc->ops->atomic_flush(exynos_crtc);
+}
+
+static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+	.enable		= exynos_drm_crtc_enable,
+	.disable	= exynos_drm_crtc_disable,
+	.mode_set_nofb	= exynos_drm_crtc_mode_set_nofb,
+	.atomic_check	= exynos_crtc_atomic_check,
+	.atomic_begin	= exynos_crtc_atomic_begin,
+	.atomic_flush	= exynos_crtc_atomic_flush,
+};
+
+void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc)
+{
+	struct drm_crtc *crtc = &exynos_crtc->base;
+	struct drm_pending_vblank_event *event = crtc->state->event;
+	unsigned long flags;
 
-	event = crtc->state->event;
 	if (event) {
 		crtc->state->event = NULL;
-
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 		if (drm_crtc_vblank_get(crtc) == 0)
 			drm_crtc_arm_vblank_event(crtc, event);
@@ -105,15 +117,6 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
 
 }
 
-static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
-	.enable		= exynos_drm_crtc_enable,
-	.disable	= exynos_drm_crtc_disable,
-	.mode_set_nofb	= exynos_drm_crtc_mode_set_nofb,
-	.atomic_check	= exynos_crtc_atomic_check,
-	.atomic_begin	= exynos_crtc_atomic_begin,
-	.atomic_flush	= exynos_crtc_atomic_flush,
-};
-
 static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
index 6a581a8af465..abd5d6ceac0c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
@@ -40,4 +40,6 @@ int exynos_drm_crtc_get_pipe_from_type(struct drm_device *drm_dev,
  */
 void exynos_drm_crtc_te_handler(struct drm_crtc *crtc);
 
+void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc);
+
 #endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 661b9fe049e2..a3162a4566ce 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -709,6 +709,8 @@ static void fimd_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	for (i = 0; i < WINDOWS_NR; i++)
 		fimd_shadow_protect_win(ctx, i, false);
+
+	exynos_crtc_handle_event(crtc);
 }
 
 static void fimd_update_plane(struct exynos_drm_crtc *crtc,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 57fe514d5c5b..5d9a62a87eec 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -170,6 +170,7 @@ static const struct exynos_drm_crtc_ops vidi_crtc_ops = {
 	.enable_vblank = vidi_enable_vblank,
 	.disable_vblank = vidi_disable_vblank,
 	.update_plane = vidi_update_plane,
+	.atomic_flush = exynos_crtc_handle_event,
 };
 
 static void vidi_fake_vblank_timer(unsigned long arg)
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 72143ac10525..25edb635a197 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1012,6 +1012,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc)
 		return;
 
 	mixer_vsync_set_update(mixer_ctx, true);
+	exynos_crtc_handle_event(crtc);
 }
 
 static void mixer_enable(struct exynos_drm_crtc *crtc)
-- 
cgit v1.2.3


From 73488331eb9460d14974a1e2c734f77ce8869183 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:57 +0100
Subject: drm/exynos/decon5433: fix vblank event handling

Current implementation of event handling assumes that vblank interrupt is
always called at the right time. It is not true, it can be delayed due to
various reasons. As a result different races can happen. The patch fixes
the issue by using hardware frame counter present in DECON to serialize
vblank and commit completion events.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 78 ++++++++++++++++++++++++++-
 include/video/exynos5433_decon.h              |  8 +++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 2130ccf1e036..cfe6f8af849f 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -68,6 +68,8 @@ struct decon_context {
 	unsigned long			flags;
 	unsigned long			out_type;
 	int				first_win;
+	spinlock_t			vblank_lock;
+	u32				frame_id;
 };
 
 static const uint32_t decon_formats[] = {
@@ -122,6 +124,48 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc)
 		writel(0, ctx->addr + DECON_VIDINTCON0);
 }
 
+/* return number of starts/ends of frame transmissions since reset */
+static u32 decon_get_frame_count(struct decon_context *ctx, bool end)
+{
+	u32 frm, pfrm, status, cnt = 2;
+
+	/* To get consistent result repeat read until frame id is stable.
+	 * Usually the loop will be executed once, in rare cases when the loop
+	 * is executed at frame change time 2nd pass will be needed.
+	 */
+	frm = readl(ctx->addr + DECON_CRFMID);
+	do {
+		status = readl(ctx->addr + DECON_VIDCON1);
+		pfrm = frm;
+		frm = readl(ctx->addr + DECON_CRFMID);
+	} while (frm != pfrm && --cnt);
+
+	/* CRFMID is incremented on BPORCH in case of I80 and on VSYNC in case
+	 * of RGB, it should be taken into account.
+	 */
+	if (!frm)
+		return 0;
+
+	switch (status & (VIDCON1_VSTATUS_MASK | VIDCON1_I80_ACTIVE)) {
+	case VIDCON1_VSTATUS_VS:
+		if (!(ctx->out_type & IFTYPE_I80))
+			--frm;
+		break;
+	case VIDCON1_VSTATUS_BP:
+		--frm;
+		break;
+	case VIDCON1_I80_ACTIVE:
+	case VIDCON1_VSTATUS_AC:
+		if (end)
+			--frm;
+		break;
+	default:
+		break;
+	}
+
+	return frm;
+}
+
 static void decon_setup_trigger(struct decon_context *ctx)
 {
 	if (!(ctx->out_type & (IFTYPE_I80 | I80_HW_TRG)))
@@ -365,11 +409,14 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc,
 static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
+	unsigned long flags;
 	int i;
 
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
+	spin_lock_irqsave(&ctx->vblank_lock, flags);
+
 	for (i = ctx->first_win; i < WINDOWS_NR; i++)
 		decon_shadow_protect_win(ctx, i, false);
 
@@ -378,12 +425,18 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	if (ctx->out_type & IFTYPE_I80)
 		set_bit(BIT_WIN_UPDATED, &ctx->flags);
+
+	ctx->frame_id = decon_get_frame_count(ctx, true);
+
 	exynos_crtc_handle_event(crtc);
+
+	spin_unlock_irqrestore(&ctx->vblank_lock, flags);
 }
 
 static void decon_swreset(struct decon_context *ctx)
 {
 	unsigned int tries;
+	unsigned long flags;
 
 	writel(0, ctx->addr + DECON_VIDCON0);
 	for (tries = 2000; tries; --tries) {
@@ -401,6 +454,10 @@ static void decon_swreset(struct decon_context *ctx)
 
 	WARN(tries == 0, "failed to software reset DECON\n");
 
+	spin_lock_irqsave(&ctx->vblank_lock, flags);
+	ctx->frame_id = 0;
+	spin_unlock_irqrestore(&ctx->vblank_lock, flags);
+
 	if (!(ctx->out_type & IFTYPE_HDMI))
 		return;
 
@@ -579,6 +636,24 @@ static const struct component_ops decon_component_ops = {
 	.unbind = decon_unbind,
 };
 
+static void decon_handle_vblank(struct decon_context *ctx)
+{
+	u32 frm;
+
+	spin_lock(&ctx->vblank_lock);
+
+	frm = decon_get_frame_count(ctx, true);
+
+	if (frm != ctx->frame_id) {
+		/* handle only if incremented, take care of wrap-around */
+		if ((s32)(frm - ctx->frame_id) > 0)
+			drm_crtc_handle_vblank(&ctx->crtc->base);
+		ctx->frame_id = frm;
+	}
+
+	spin_unlock(&ctx->vblank_lock);
+}
+
 static irqreturn_t decon_irq_handler(int irq, void *dev_id)
 {
 	struct decon_context *ctx = dev_id;
@@ -599,7 +674,7 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id)
 			    (VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F))
 				return IRQ_HANDLED;
 		}
-		drm_crtc_handle_vblank(&ctx->crtc->base);
+		decon_handle_vblank(ctx);
 	}
 
 out:
@@ -672,6 +747,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 	__set_bit(BIT_SUSPENDED, &ctx->flags);
 	ctx->dev = dev;
 	ctx->out_type = (unsigned long)of_device_get_match_data(dev);
+	spin_lock_init(&ctx->vblank_lock);
 
 	if (ctx->out_type & IFTYPE_HDMI) {
 		ctx->first_win = 1;
diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h
index ef8e2a8ad0af..352fc0d9528f 100644
--- a/include/video/exynos5433_decon.h
+++ b/include/video/exynos5433_decon.h
@@ -46,6 +46,7 @@
 #define DECON_FRAMEFIFO_STATUS		0x0524
 #define DECON_CMU			0x1404
 #define DECON_UPDATE			0x1410
+#define DECON_CRFMID			0x1414
 #define DECON_UPDATE_SCHEME		0x1438
 #define DECON_VIDCON1			0x2000
 #define DECON_VIDCON2			0x2004
@@ -142,6 +143,13 @@
 #define STANDALONE_UPDATE_F		(1 << 0)
 
 /* DECON_VIDCON1 */
+#define VIDCON1_LINECNT_MASK		(0x0fff << 16)
+#define VIDCON1_I80_ACTIVE		(1 << 15)
+#define VIDCON1_VSTATUS_MASK		(0x3 << 13)
+#define VIDCON1_VSTATUS_VS		(0 << 13)
+#define VIDCON1_VSTATUS_BP		(1 << 13)
+#define VIDCON1_VSTATUS_AC		(2 << 13)
+#define VIDCON1_VSTATUS_FP		(3 << 13)
 #define VIDCON1_VCLK_MASK		(0x3 << 9)
 #define VIDCON1_VCLK_RUN_VDEN_DISABLE	(0x3 << 9)
 #define VIDCON1_VCLK_HOLD		(0x0 << 9)
-- 
cgit v1.2.3


From f3cce673e1c11112c536fbc8e6912c5414d7141f Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:58 +0100
Subject: drm/exynos/decon5433: signal frame done interrupt at front porch

DECON in case of video mode generates interrupt by default at start
of vertical back porch. As this interrupt is used to generate VBLANK
events more optimal point is start of vertical front porch.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +-
 include/video/exynos5433_decon.h              | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index cfe6f8af849f..a43d0fa0b051 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -105,7 +105,7 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc)
 		if (ctx->out_type & IFTYPE_I80)
 			val |= VIDINTCON0_FRAMEDONE;
 		else
-			val |= VIDINTCON0_INTFRMEN;
+			val |= VIDINTCON0_INTFRMEN | VIDINTCON0_FRAMESEL_FP;
 
 		writel(val, ctx->addr + DECON_VIDINTCON0);
 	}
diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h
index 352fc0d9528f..6b083d327e98 100644
--- a/include/video/exynos5433_decon.h
+++ b/include/video/exynos5433_decon.h
@@ -127,6 +127,10 @@
 
 /* VIDINTCON0 */
 #define VIDINTCON0_FRAMEDONE		(1 << 17)
+#define VIDINTCON0_FRAMESEL_BP		(0 << 15)
+#define VIDINTCON0_FRAMESEL_VS		(1 << 15)
+#define VIDINTCON0_FRAMESEL_AC		(2 << 15)
+#define VIDINTCON0_FRAMESEL_FP		(3 << 15)
 #define VIDINTCON0_INTFRMEN		(1 << 12)
 #define VIDINTCON0_INTEN		(1 << 0)
 
-- 
cgit v1.2.3


From 82a01783252be726d76cdbbababc16540f582cec Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:59 +0100
Subject: drm/exynos/fimd: signal frame done interrupt at front porch

VBLANK interrupt should be signalled as soon as scanout ends, front porch
is the best moment.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index a3162a4566ce..3f04d72c448d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -243,7 +243,7 @@ static int fimd_enable_vblank(struct exynos_drm_crtc *crtc)
 			val |= VIDINTCON0_INT_FRAME;
 
 			val &= ~VIDINTCON0_FRAMESEL0_MASK;
-			val |= VIDINTCON0_FRAMESEL0_VSYNC;
+			val |= VIDINTCON0_FRAMESEL0_FRONTPORCH;
 			val &= ~VIDINTCON0_FRAMESEL1_MASK;
 			val |= VIDINTCON0_FRAMESEL1_NONE;
 		}
-- 
cgit v1.2.3


From f07d9c2864d4ccf0a0b5bd2dd6491f5204eab5fe Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:28:00 +0100
Subject: drm/exynos/decon5433: fix software trigger mask

The patch fixes copy/paste bug introduced during code refactoring.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: b93c2e8b5d9d ("drm/exynos/decon5433: configure sysreg in case of hardware trigger")Fixes:
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index a43d0fa0b051..c0e8d3302292 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -172,8 +172,8 @@ static void decon_setup_trigger(struct decon_context *ctx)
 		return;
 
 	if (!(ctx->out_type & I80_HW_TRG)) {
-		writel(TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN
-		       | TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
+		writel(TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F |
+		       TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
 		       ctx->addr + DECON_TRIGCON);
 		return;
 	}
-- 
cgit v1.2.3


From 9cdf0ed25a9b34fd82cb0eb47a8bdc47dc9f4ff5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 14 Mar 2017 20:38:04 +0200
Subject: drm/exynos: Print kernel pointers in a restricted form

Printing raw kernel pointers might reveal information which sometimes we
try to hide (e.g. with Kernel Address Space Layout Randomization).  Use
the "%pK" format so these pointers will be hidden for unprivileged
users.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c     |  4 ++--
 drivers/gpu/drm/exynos/exynos_drm_fimc.c    |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_gem.c     |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_gsc.c     |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_ipp.c     | 22 +++++++++++-----------
 drivers/gpu/drm/exynos/exynos_drm_rotator.c |  2 +-
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index ef6f9c6de098..c671f8e017db 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -966,7 +966,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 	bool first = !xfer->tx_done;
 	u32 reg;
 
-	dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n",
+	dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n",
 		xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done);
 
 	if (length > DSI_TX_FIFO_SIZE)
@@ -1164,7 +1164,7 @@ static bool exynos_dsi_transfer_finish(struct exynos_dsi *dsi)
 	spin_unlock_irqrestore(&dsi->transfer_lock, flags);
 
 	dev_dbg(dsi->dev,
-		"> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
+		"> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
 		xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len,
 		xfer->rx_done);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 95871577015d..5b18b5c5fdf2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1695,7 +1695,7 @@ static int fimc_probe(struct platform_device *pdev)
 		goto err_put_clk;
 	}
 
-	DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
+	DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
 
 	spin_lock_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 4c28f7ffcc4d..55a1579d11b3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -218,7 +218,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev,
 		return ERR_PTR(ret);
 	}
 
-	DRM_DEBUG_KMS("created file object = %p\n", obj->filp);
+	DRM_DEBUG_KMS("created file object = %pK\n", obj->filp);
 
 	return exynos_gem;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index bef57987759d..0506b2b17ac1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1723,7 +1723,7 @@ static int gsc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
+	DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
 
 	mutex_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 9c84ee76f18a..3edda18cc2d2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -208,7 +208,7 @@ static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
 	 * e.g PAUSE state, queue buf, command control.
 	 */
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", count++, ippdrv);
+		DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv);
 
 		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
@@ -388,7 +388,7 @@ int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
 	}
 	property->prop_id = ret;
 
-	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%p]\n",
+	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n",
 		property->prop_id, property->cmd, ippdrv);
 
 	/* stored property information and ippdrv in private data */
@@ -518,7 +518,7 @@ static int ipp_put_mem_node(struct drm_device *drm_dev,
 {
 	int i;
 
-	DRM_DEBUG_KMS("node[%p]\n", m_node);
+	DRM_DEBUG_KMS("node[%pK]\n", m_node);
 
 	if (!m_node) {
 		DRM_ERROR("invalid dequeue node.\n");
@@ -562,7 +562,7 @@ static struct drm_exynos_ipp_mem_node
 	m_node->buf_id = qbuf->buf_id;
 	INIT_LIST_HEAD(&m_node->list);
 
-	DRM_DEBUG_KMS("m_node[%p]ops_id[%d]\n", m_node, qbuf->ops_id);
+	DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id);
 	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
 
 	for_each_ipp_planar(i) {
@@ -659,7 +659,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
 
 	mutex_lock(&c_node->event_lock);
 	list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
-		DRM_DEBUG_KMS("count[%d]e[%p]\n", count++, e);
+		DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e);
 
 		/*
 		 * qbuf == NULL condition means all event deletion.
@@ -750,7 +750,7 @@ static struct drm_exynos_ipp_mem_node
 
 	/* find memory node from memory list */
 	list_for_each_entry(m_node, head, list) {
-		DRM_DEBUG_KMS("count[%d]m_node[%p]\n", count++, m_node);
+		DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node);
 
 		/* compare buffer id */
 		if (m_node->buf_id == qbuf->buf_id)
@@ -767,7 +767,7 @@ static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
 	struct exynos_drm_ipp_ops *ops = NULL;
 	int ret = 0;
 
-	DRM_DEBUG_KMS("node[%p]\n", m_node);
+	DRM_DEBUG_KMS("node[%pK]\n", m_node);
 
 	if (!m_node) {
 		DRM_ERROR("invalid queue node.\n");
@@ -1232,7 +1232,7 @@ static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
 			m_node = list_first_entry(head,
 				struct drm_exynos_ipp_mem_node, list);
 
-			DRM_DEBUG_KMS("m_node[%p]\n", m_node);
+			DRM_DEBUG_KMS("m_node[%pK]\n", m_node);
 
 			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 			if (ret) {
@@ -1601,7 +1601,7 @@ static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
 		}
 		ippdrv->prop_list.ipp_id = ret;
 
-		DRM_DEBUG_KMS("count[%d]ippdrv[%p]ipp_id[%d]\n",
+		DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n",
 			count++, ippdrv, ret);
 
 		/* store parent device for node */
@@ -1659,7 +1659,7 @@ static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
 
 	file_priv->ipp_dev = dev;
 
-	DRM_DEBUG_KMS("done priv[%p]\n", dev);
+	DRM_DEBUG_KMS("done priv[%pK]\n", dev);
 
 	return 0;
 }
@@ -1676,7 +1676,7 @@ static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
 		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry_safe(c_node, tc_node,
 			&ippdrv->cmd_list, list) {
-			DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n",
+			DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n",
 				count++, ippdrv);
 
 			if (c_node->filp == file) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 6591e406084c..79282a820ecc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -748,7 +748,7 @@ static int rotator_probe(struct platform_device *pdev)
 		goto err_ippdrv_register;
 	}
 
-	DRM_DEBUG_KMS("ippdrv[%p]\n", ippdrv);
+	DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv);
 
 	platform_set_drvdata(pdev, rot);
 
-- 
cgit v1.2.3


From 22e098daae7e53763493b9d9976ef8c65190017e Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Wed, 15 Mar 2017 12:20:42 +0100
Subject: drm/exynos/dsi: make te-gpios optional

DSI forwards te-gpios interrupts to display controller, but if display
controller works in HW-TRIGGER mode this interrupt is not necessary.
Making te-gpios property optional allows to avoid generating spare
interrupts.
And also if panel device node of command mode panel device doesn't provide
te-gpios property then the panel driver failed to probe. This was a critial
issue.

With this patch we can not only get rid of 60 interrupt callbacks per second
but also fix the critial issues.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index c671f8e017db..d7ef26370e67 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1335,9 +1335,12 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi)
 	int te_gpio_irq;
 
 	dsi->te_gpio = of_get_named_gpio(dsi->panel_node, "te-gpios", 0);
+	if (dsi->te_gpio == -ENOENT)
+		return 0;
+
 	if (!gpio_is_valid(dsi->te_gpio)) {
-		dev_err(dsi->dev, "no te-gpios specified\n");
 		ret = dsi->te_gpio;
+		dev_err(dsi->dev, "cannot get te-gpios, %d\n", ret);
 		goto out;
 	}
 
-- 
cgit v1.2.3


From fc36a903265c18d124cefaba364a7fa71b21be61 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 21 Mar 2017 12:38:02 +1100
Subject: Revert "powerpc/64: Disable use of radix under a hypervisor"

This reverts commit 3f91a89d424a79f8082525db5a375e438887bb3e.

Now that we do have the machinery for using the radix MMU under a
hypervisor, the extra check and comment introduced in 3f91a89d424a are
no longer correct.  The result is that when booted under a hypervisor
that only allows use of radix, we clear the MMU_FTR_TYPE_RADIX and
then set it again, and print a warning about ignoring the
disable_radix command line option, even though the command line does
not include "disable_radix".

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/init_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9be992083d2a..c22f207aa656 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -397,8 +397,7 @@ static void early_check_vec5(void)
 void __init mmu_early_init_devtree(void)
 {
 	/* Disable radix mode based on kernel command line. */
-	/* We don't yet have the machinery to do radix as a guest. */
-	if (disable_radix || !(mfmsr() & MSR_HV))
+	if (disable_radix)
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 
 	/*
-- 
cgit v1.2.3


From cc638a488a5205713b51eabd047be6ea641cc328 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 20 Mar 2017 17:55:22 +1100
Subject: gcc-plugins: update architecture list in documentation

Commit 65c059bcaa73 ("powerpc: Enable support for GCC plugins") enabled GCC
plugins on powerpc, but neglected to update the architecture list in the
docs. Rectify this.

Fixes: 65c059bcaa73 ("powerpc: Enable support for GCC plugins")
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/gcc-plugins.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/gcc-plugins.txt b/Documentation/gcc-plugins.txt
index 891c69464434..433eaefb4aa1 100644
--- a/Documentation/gcc-plugins.txt
+++ b/Documentation/gcc-plugins.txt
@@ -18,8 +18,8 @@ because gcc versions 4.5 and 4.6 are compiled by a C compiler,
 gcc-4.7 can be compiled by a C or a C++ compiler,
 and versions 4.8+ can only be compiled by a C++ compiler.
 
-Currently the GCC plugin infrastructure supports only the x86, arm and arm64
-architectures.
+Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and
+powerpc architectures.
 
 This infrastructure was ported from grsecurity [6] and PaX [7].
 
-- 
cgit v1.2.3


From c6736a94d0e527ddc0d1eb99dbc59886a9ecf471 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Mar 2017 13:26:02 +0100
Subject: ALSA: x86: Make CONFIG_SND_X86 bool

CONFIG_SND_X86 is a menu config to filter only for x86-specific
drivers in its sub-menu, and this doesn't have to be tristate but
rather it should be a bool.  Also, like other sub-menu configs, it's
more user-friendly to be default=y; it's merely a menu config and the
actual drivers are configured in the sub-menu, after all.

Fixes: 287599cf2d77 ("ALSA: add Intel HDMI LPE audio driver for BYT/CHT-T")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/x86/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/x86/Kconfig b/sound/x86/Kconfig
index 84c8f8fc597c..8adf4d1bd46e 100644
--- a/sound/x86/Kconfig
+++ b/sound/x86/Kconfig
@@ -1,6 +1,7 @@
 menuconfig SND_X86
-	tristate "X86 sound devices"
+	bool "X86 sound devices"
 	depends on X86
+	default y
 	---help---
 	  X86 sound devices that don't fall under SoC or PCI categories
 
-- 
cgit v1.2.3


From c520ff3d03f0b5db7146d9beed6373ad5d2a5e0e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Mar 2017 13:56:04 +0100
Subject: ALSA: seq: Fix racy cell insertions during snd_seq_pool_done()

When snd_seq_pool_done() is called, it marks the closing flag to
refuse the further cell insertions.  But snd_seq_pool_done() itself
doesn't clear the cells but just waits until all cells are cleared by
the caller side.  That is, it's racy, and this leads to the endless
stall as syzkaller spotted.

This patch addresses the racy by splitting the setup of pool->closing
flag out of snd_seq_pool_done(), and calling it properly before
snd_seq_pool_done().

BugLink: http://lkml.kernel.org/r/CACT4Y+aqqy8bZA1fFieifNxR2fAfFQQABcBHj801+u5ePV0URw@mail.gmail.com
Reported-and-tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_clientmgr.c |  1 +
 sound/core/seq/seq_fifo.c      |  3 +++
 sound/core/seq/seq_memory.c    | 17 +++++++++++++----
 sound/core/seq/seq_memory.h    |  1 +
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 4c935202ce23..f3b1d7f50b81 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -1832,6 +1832,7 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
 	     info->output_pool != client->pool->size)) {
 		if (snd_seq_write_pool_allocated(client)) {
 			/* remove all existing cells */
+			snd_seq_pool_mark_closing(client->pool);
 			snd_seq_queue_client_leave_cells(client->number);
 			snd_seq_pool_done(client->pool);
 		}
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 448efd4e980e..33980d1c8037 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -72,6 +72,9 @@ void snd_seq_fifo_delete(struct snd_seq_fifo **fifo)
 		return;
 	*fifo = NULL;
 
+	if (f->pool)
+		snd_seq_pool_mark_closing(f->pool);
+
 	snd_seq_fifo_clear(f);
 
 	/* wake up clients if any */
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index 1a1acf3ddda4..d4c61ec9be13 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -415,6 +415,18 @@ int snd_seq_pool_init(struct snd_seq_pool *pool)
 	return 0;
 }
 
+/* refuse the further insertion to the pool */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool)
+{
+	unsigned long flags;
+
+	if (snd_BUG_ON(!pool))
+		return;
+	spin_lock_irqsave(&pool->lock, flags);
+	pool->closing = 1;
+	spin_unlock_irqrestore(&pool->lock, flags);
+}
+
 /* remove events */
 int snd_seq_pool_done(struct snd_seq_pool *pool)
 {
@@ -425,10 +437,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool)
 		return -EINVAL;
 
 	/* wait for closing all threads */
-	spin_lock_irqsave(&pool->lock, flags);
-	pool->closing = 1;
-	spin_unlock_irqrestore(&pool->lock, flags);
-
 	if (waitqueue_active(&pool->output_sleep))
 		wake_up(&pool->output_sleep);
 
@@ -485,6 +493,7 @@ int snd_seq_pool_delete(struct snd_seq_pool **ppool)
 	*ppool = NULL;
 	if (pool == NULL)
 		return 0;
+	snd_seq_pool_mark_closing(pool);
 	snd_seq_pool_done(pool);
 	kfree(pool);
 	return 0;
diff --git a/sound/core/seq/seq_memory.h b/sound/core/seq/seq_memory.h
index 4a2ec779b8a7..32f959c17786 100644
--- a/sound/core/seq/seq_memory.h
+++ b/sound/core/seq/seq_memory.h
@@ -84,6 +84,7 @@ static inline int snd_seq_total_cells(struct snd_seq_pool *pool)
 int snd_seq_pool_init(struct snd_seq_pool *pool);
 
 /* done pool - free events */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool);
 int snd_seq_pool_done(struct snd_seq_pool *pool);
 
 /* create pool */
-- 
cgit v1.2.3


From 49cc4c217c0dbb7d09c402472d1f85a81c093f9f Mon Sep 17 00:00:00 2001
From: Aaron Armstrong Skomra <skomra@gmail.com>
Date: Mon, 6 Mar 2017 10:54:57 -0800
Subject: HID: wacom: Correct Intuos Pro 2 resolution

The features struct for the second gen Intuos Pro uses the wrong constant for
the resolution. This fix is for commit 4922cd2.

Fixes: 4922cd2 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface")
Signed-off-by: Aaron Armstrong Skomra <skomra@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 4aa3de9f1163..3d864466d473 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -4197,10 +4197,10 @@ static const struct wacom_features wacom_features_0x343 =
 	  WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 static const struct wacom_features wacom_features_0x360 =
 	{ "Wacom Intuos Pro M", 44800, 29600, 8191, 63,
-	  INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+	  INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
 static const struct wacom_features wacom_features_0x361 =
 	{ "Wacom Intuos Pro L", 62200, 43200, 8191, 63,
-	  INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+	  INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
 
 static const struct wacom_features wacom_features_HID_ANY_ID =
 	{ "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
-- 
cgit v1.2.3


From b6b1f19b06b7d4dcc261a88d74c5fb0a53988b4e Mon Sep 17 00:00:00 2001
From: Aaron Armstrong Skomra <skomra@gmail.com>
Date: Mon, 6 Mar 2017 10:54:58 -0800
Subject: HID: wacom: don't manually release resources for the EKR

Commit 5b779fc introduces the manual release of resources in wacom_remove() as
an addition to the driver's use of devm.  The EKR resources can only be
released through wacom_remote_destroy_one() so we skip the manual release for
it.

Fixes: 5b779fc ("HID: wacom: release the resources before leaving despite devm")
Signed-off-by: Aaron Armstrong Skomra <skomra@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_sys.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index be8f7e2a026f..994bddc55b82 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev)
 
 	/* make sure we don't trigger the LEDs */
 	wacom_led_groups_release(wacom);
-	wacom_release_resources(wacom);
+
+	if (wacom->wacom_wac.features.type != REMOTE)
+		wacom_release_resources(wacom);
 
 	hid_set_drvdata(hdev, NULL);
 }
-- 
cgit v1.2.3


From deaba636997557fce46ca7bcb509bff5ea1b0558 Mon Sep 17 00:00:00 2001
From: Oscar Campos <oscar.campos@member.fsf.org>
Date: Fri, 10 Feb 2017 18:23:00 +0000
Subject: HID: corsair: support for K65-K70 Rapidfire and Scimitar Pro RGB

Add quirks for several corsair gaming devices to avoid long delays on
report initialization

Supported devices:

 - Corsair K65RGB Rapidfire Gaming Keyboard
 - Corsair K70RGB Rapidfire Gaming Keyboard
 - Corsair Scimitar Pro RGB Gaming Mouse

Signed-off-by: Oscar Campos <oscar.campos@member.fsf.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index b3df60da0297..0e2e7c571d22 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -278,6 +278,9 @@
 #define USB_DEVICE_ID_CORSAIR_K70RGB    0x1b13
 #define USB_DEVICE_ID_CORSAIR_STRAFE    0x1b15
 #define USB_DEVICE_ID_CORSAIR_K65RGB    0x1b17
+#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE  0x1b38
+#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE  0x1b39
+#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB  0x1b3e
 
 #define USB_VENDOR_ID_CREATIVELABS	0x041e
 #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51	0x322c
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index d6847a664446..a69a3c88ab29 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,9 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
-- 
cgit v1.2.3


From 01adc47e885f1127b29d76d0dfb21d8262f9d6b4 Mon Sep 17 00:00:00 2001
From: Oscar Campos <oscar.campos@member.fsf.org>
Date: Mon, 6 Mar 2017 21:02:39 +0000
Subject: HID: corsair: Add driver Scimitar Pro RGB gaming mouse 1b1c:1b3e
 support to hid-corsair

This mouse sold by Corsair as Scimitar PRO RGB defines two consecutive
Logical Minimum items in its Application (Consumer.0001) report making
it non parseable. This patch fixes the report descriptor overriding
byte 77 in rdesc from 0x16 (Logical Minimum with 16 bits value) to 0x26
(Logical Maximum with 16 bits value).

Signed-off-by: Oscar Campos <oscar.campos@member.fsf.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig       |  1 +
 drivers/hid/hid-core.c    |  1 +
 drivers/hid/hid-corsair.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 8eab3200ac9a..8c54cb8f5d6d 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -190,6 +190,7 @@ config HID_CORSAIR
 
 	Supported devices:
 	- Vengeance K90
+	- Scimitar PRO RGB
 
 config HID_PRODIKEYS
 	tristate "Prodikeys PC-MIDI Keyboard support"
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index ae01ae601d74..3ceb4a2af381 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c
index c0303f61c26a..9ba5d98a1180 100644
--- a/drivers/hid/hid-corsair.c
+++ b/drivers/hid/hid-corsair.c
@@ -3,8 +3,10 @@
  *
  * Supported devices:
  *  - Vengeance K90 Keyboard
+ *  - Scimitar PRO RGB Gaming Mouse
  *
  * Copyright (c) 2015 Clement Vuchener
+ * Copyright (c) 2017 Oscar Campos
  */
 
 /*
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev,
 	return 0;
 }
 
+/*
+ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
+ * non parseable as they define two consecutive Logical Minimum for
+ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
+ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
+ * prevents poper parsing of the report descriptor due Logical
+ * Minimum being larger than Logical Maximum.
+ *
+ * This driver fixes the report descriptor for:
+ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
+ */
+
+static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+        unsigned int *rsize)
+{
+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+	if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+		/*
+		 * Corsair Scimitar RGB Pro report descriptor is broken and
+		 * defines two different Logical Minimum for the Consumer
+		 * Application. The byte 77 should be a 0x26 defining a 16
+		 * bits integer for the Logical Maximum but it is a 0x16
+		 * instead (Logical Minimum)
+		 */
+		switch (hdev->product) {
+		case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
+			if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
+			&& rdesc[78] == 0xff && rdesc[79] == 0x0f) {
+				hid_info(hdev, "Fixing up report descriptor\n");
+				rdesc[77] = 0x26;
+			}
+			break;
+		}
+
+	}
+	return rdesc;
+}
+
 static const struct hid_device_id corsair_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
 		.driver_data = CORSAIR_USE_K90_MACRO |
 			       CORSAIR_USE_K90_BACKLIGHT },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
+            USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
 	{}
 };
 
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = {
 	.event = corsair_event,
 	.remove = corsair_remove,
 	.input_mapping = corsair_input_mapping,
+	.report_fixup = corsair_mouse_report_fixup,
 };
 
 module_hid_driver(corsair_driver);
 
 MODULE_LICENSE("GPL");
+/* Original K90 driver author */
 MODULE_AUTHOR("Clement Vuchener");
+/* Scimitar PRO RGB driver author */
+MODULE_AUTHOR("Oscar Campos");
 MODULE_DESCRIPTION("HID driver for Corsair devices");
-- 
cgit v1.2.3


From 6e5364f5f472d4ea66d37459e299071b0190362c Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 14 Mar 2017 17:08:16 -0700
Subject: HID: wacom: generic: Wacom mouse is only provided for opaque tablets

Commit f85c9dc ("Support tool ID and additional tool types") introduced mouse
and lens cursor tools to generic codepath, which covers both display (direct)
and opaque tablets (indirect devices). However, mouse and lens cursor tools are
only provided for opaque tablets. This patch ignores mouse and lens cursor tools
if the device is a display tablet.

Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
Reviewed-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 3d864466d473..94250c293be2 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
 		input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH);
 		input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL);
 		input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
-		input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
-		input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+		if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) {
+			input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
+			input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+		}
 		break;
 	case WACOM_HID_WD_FINGERWHEEL:
 		wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);
-- 
cgit v1.2.3


From 3d3d18f086cdda72ee18a454db70ca72c6e3246c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 21 Mar 2017 14:45:31 +0000
Subject: drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker)

The rcu_barrier() takes the cpu_hotplug mutex which itself is not
reclaim-safe, and so rcu_barrier() is illegal from inside the shrinker.

[  309.661373] =========================================================
[  309.661376] [ INFO: possible irq lock inversion dependency detected ]
[  309.661380] 4.11.0-rc1-CI-CI_DRM_2333+ #1 Tainted: G        W
[  309.661383] ---------------------------------------------------------
[  309.661386] gem_exec_gttfil/6435 just changed the state of lock:
[  309.661389]  (rcu_preempt_state.barrier_mutex){+.+.-.}, at: [<ffffffff81100731>] _rcu_barrier+0x31/0x160
[  309.661399] but this lock took another, RECLAIM_FS-unsafe lock in the past:
[  309.661402]  (cpu_hotplug.lock){+.+.+.}
[  309.661404]

               and interrupts could create inverse lock ordering between them.

[  309.661410]
               other info that might help us debug this:
[  309.661414]  Possible interrupt unsafe locking scenario:

[  309.661417]        CPU0                    CPU1
[  309.661419]        ----                    ----
[  309.661421]   lock(cpu_hotplug.lock);
[  309.661425]                                local_irq_disable();
[  309.661432]                                lock(rcu_preempt_state.barrier_mutex);
[  309.661441]                                lock(cpu_hotplug.lock);
[  309.661446]   <Interrupt>
[  309.661448]     lock(rcu_preempt_state.barrier_mutex);
[  309.661453]
                *** DEADLOCK ***

[  309.661460] 4 locks held by gem_exec_gttfil/6435:
[  309.661464]  #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff8120d83d>] vfs_write+0x17d/0x1f0
[  309.661475]  #1:  (debugfs_srcu){......}, at: [<ffffffff81320491>] debugfs_use_file_start+0x41/0xa0
[  309.661486]  #2:  (&attr->mutex){+.+.+.}, at: [<ffffffff8123a3e7>] simple_attr_write+0x37/0xe0
[  309.661495]  #3:  (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0091b4a>] i915_drop_caches_set+0x3a/0x150 [i915]
[  309.661540]
               the shortest dependencies between 2nd lock and 1st lock:
[  309.661547]  -> (cpu_hotplug.lock){+.+.+.} ops: 829 {
[  309.661553]     HARDIRQ-ON-W at:
[  309.661560]                       __lock_acquire+0x5e5/0x1b50
[  309.661565]                       lock_acquire+0xc9/0x220
[  309.661572]                       __mutex_lock+0x6e/0x990
[  309.661576]                       mutex_lock_nested+0x16/0x20
[  309.661583]                       get_online_cpus+0x61/0x80
[  309.661590]                       kmem_cache_create+0x25/0x1d0
[  309.661596]                       debug_objects_mem_init+0x30/0x249
[  309.661602]                       start_kernel+0x341/0x3fe
[  309.661607]                       x86_64_start_reservations+0x2a/0x2c
[  309.661612]                       x86_64_start_kernel+0x173/0x186
[  309.661619]                       verify_cpu+0x0/0xfc
[  309.661622]     SOFTIRQ-ON-W at:
[  309.661627]                       __lock_acquire+0x611/0x1b50
[  309.661632]                       lock_acquire+0xc9/0x220
[  309.661636]                       __mutex_lock+0x6e/0x990
[  309.661641]                       mutex_lock_nested+0x16/0x20
[  309.661646]                       get_online_cpus+0x61/0x80
[  309.661650]                       kmem_cache_create+0x25/0x1d0
[  309.661655]                       debug_objects_mem_init+0x30/0x249
[  309.661660]                       start_kernel+0x341/0x3fe
[  309.661664]                       x86_64_start_reservations+0x2a/0x2c
[  309.661669]                       x86_64_start_kernel+0x173/0x186
[  309.661674]                       verify_cpu+0x0/0xfc
[  309.661677]     RECLAIM_FS-ON-W at:
[  309.661682]                          mark_held_locks+0x6f/0xa0
[  309.661687]                          lockdep_trace_alloc+0xb3/0x100
[  309.661693]                          kmem_cache_alloc_trace+0x31/0x2e0
[  309.661699]                          __smpboot_create_thread.part.1+0x27/0xe0
[  309.661704]                          smpboot_create_threads+0x61/0x90
[  309.661709]                          cpuhp_invoke_callback+0x9c/0x8a0
[  309.661713]                          cpuhp_up_callbacks+0x31/0xb0
[  309.661718]                          _cpu_up+0x7a/0xc0
[  309.661723]                          do_cpu_up+0x5f/0x80
[  309.661727]                          cpu_up+0xe/0x10
[  309.661734]                          smp_init+0x71/0xb3
[  309.661738]                          kernel_init_freeable+0x94/0x19e
[  309.661743]                          kernel_init+0x9/0xf0
[  309.661748]                          ret_from_fork+0x2e/0x40
[  309.661752]     INITIAL USE at:
[  309.661757]                      __lock_acquire+0x234/0x1b50
[  309.661761]                      lock_acquire+0xc9/0x220
[  309.661766]                      __mutex_lock+0x6e/0x990
[  309.661771]                      mutex_lock_nested+0x16/0x20
[  309.661775]                      get_online_cpus+0x61/0x80
[  309.661780]                      __cpuhp_setup_state+0x44/0x170
[  309.661785]                      page_alloc_init+0x23/0x3a
[  309.661790]                      start_kernel+0x124/0x3fe
[  309.661794]                      x86_64_start_reservations+0x2a/0x2c
[  309.661799]                      x86_64_start_kernel+0x173/0x186
[  309.661804]                      verify_cpu+0x0/0xfc
[  309.661807]   }
[  309.661813]   ... key      at: [<ffffffff81e37690>] cpu_hotplug+0xb0/0x100
[  309.661817]   ... acquired at:
[  309.661821]    lock_acquire+0xc9/0x220
[  309.661825]    __mutex_lock+0x6e/0x990
[  309.661829]    mutex_lock_nested+0x16/0x20
[  309.661833]    get_online_cpus+0x61/0x80
[  309.661837]    _rcu_barrier+0x9f/0x160
[  309.661841]    rcu_barrier+0x10/0x20
[  309.661847]    netdev_run_todo+0x5f/0x310
[  309.661852]    rtnl_unlock+0x9/0x10
[  309.661856]    default_device_exit_batch+0x133/0x150
[  309.661862]    ops_exit_list.isra.0+0x4d/0x60
[  309.661866]    cleanup_net+0x1d8/0x2c0
[  309.661872]    process_one_work+0x1f4/0x6d0
[  309.661876]    worker_thread+0x49/0x4a0
[  309.661881]    kthread+0x107/0x140
[  309.661884]    ret_from_fork+0x2e/0x40

[  309.661890] -> (rcu_preempt_state.barrier_mutex){+.+.-.} ops: 179 {
[  309.661896]    HARDIRQ-ON-W at:
[  309.661901]                     __lock_acquire+0x5e5/0x1b50
[  309.661905]                     lock_acquire+0xc9/0x220
[  309.661910]                     __mutex_lock+0x6e/0x990
[  309.661914]                     mutex_lock_nested+0x16/0x20
[  309.661919]                     _rcu_barrier+0x31/0x160
[  309.661923]                     rcu_barrier+0x10/0x20
[  309.661928]                     netdev_run_todo+0x5f/0x310
[  309.661932]                     rtnl_unlock+0x9/0x10
[  309.661936]                     default_device_exit_batch+0x133/0x150
[  309.661941]                     ops_exit_list.isra.0+0x4d/0x60
[  309.661946]                     cleanup_net+0x1d8/0x2c0
[  309.661951]                     process_one_work+0x1f4/0x6d0
[  309.661955]                     worker_thread+0x49/0x4a0
[  309.661960]                     kthread+0x107/0x140
[  309.661964]                     ret_from_fork+0x2e/0x40
[  309.661968]    SOFTIRQ-ON-W at:
[  309.661972]                     __lock_acquire+0x611/0x1b50
[  309.661977]                     lock_acquire+0xc9/0x220
[  309.661981]                     __mutex_lock+0x6e/0x990
[  309.661986]                     mutex_lock_nested+0x16/0x20
[  309.661990]                     _rcu_barrier+0x31/0x160
[  309.661995]                     rcu_barrier+0x10/0x20
[  309.661999]                     netdev_run_todo+0x5f/0x310
[  309.662003]                     rtnl_unlock+0x9/0x10
[  309.662008]                     default_device_exit_batch+0x133/0x150
[  309.662013]                     ops_exit_list.isra.0+0x4d/0x60
[  309.662017]                     cleanup_net+0x1d8/0x2c0
[  309.662022]                     process_one_work+0x1f4/0x6d0
[  309.662027]                     worker_thread+0x49/0x4a0
[  309.662031]                     kthread+0x107/0x140
[  309.662035]                     ret_from_fork+0x2e/0x40
[  309.662039]    IN-RECLAIM_FS-W at:
[  309.662043]                        __lock_acquire+0x638/0x1b50
[  309.662048]                        lock_acquire+0xc9/0x220
[  309.662053]                        __mutex_lock+0x6e/0x990
[  309.662058]                        mutex_lock_nested+0x16/0x20
[  309.662062]                        _rcu_barrier+0x31/0x160
[  309.662067]                        rcu_barrier+0x10/0x20
[  309.662089]                        i915_gem_shrink_all+0x33/0x40 [i915]
[  309.662109]                        i915_drop_caches_set+0x141/0x150 [i915]
[  309.662114]                        simple_attr_write+0xc7/0xe0
[  309.662119]                        full_proxy_write+0x4f/0x70
[  309.662124]                        __vfs_write+0x23/0x120
[  309.662128]                        vfs_write+0xc6/0x1f0
[  309.662133]                        SyS_write+0x44/0xb0
[  309.662138]                        entry_SYSCALL_64_fastpath+0x1c/0xb1
[  309.662142]    INITIAL USE at:
[  309.662147]                    __lock_acquire+0x234/0x1b50
[  309.662151]                    lock_acquire+0xc9/0x220
[  309.662156]                    __mutex_lock+0x6e/0x990
[  309.662160]                    mutex_lock_nested+0x16/0x20
[  309.662165]                    _rcu_barrier+0x31/0x160
[  309.662169]                    rcu_barrier+0x10/0x20
[  309.662174]                    netdev_run_todo+0x5f/0x310
[  309.662178]                    rtnl_unlock+0x9/0x10
[  309.662183]                    default_device_exit_batch+0x133/0x150
[  309.662188]                    ops_exit_list.isra.0+0x4d/0x60
[  309.662192]                    cleanup_net+0x1d8/0x2c0
[  309.662197]                    process_one_work+0x1f4/0x6d0
[  309.662202]                    worker_thread+0x49/0x4a0
[  309.662206]                    kthread+0x107/0x140
[  309.662210]                    ret_from_fork+0x2e/0x40
[  309.662214]  }
[  309.662220]  ... key      at: [<ffffffff81e4e1c8>] rcu_preempt_state+0x508/0x780
[  309.662225]  ... acquired at:
[  309.662229]    check_usage_forwards+0x12b/0x130
[  309.662233]    mark_lock+0x360/0x6f0
[  309.662237]    __lock_acquire+0x638/0x1b50
[  309.662241]    lock_acquire+0xc9/0x220
[  309.662245]    __mutex_lock+0x6e/0x990
[  309.662249]    mutex_lock_nested+0x16/0x20
[  309.662253]    _rcu_barrier+0x31/0x160
[  309.662257]    rcu_barrier+0x10/0x20
[  309.662279]    i915_gem_shrink_all+0x33/0x40 [i915]
[  309.662298]    i915_drop_caches_set+0x141/0x150 [i915]
[  309.662303]    simple_attr_write+0xc7/0xe0
[  309.662307]    full_proxy_write+0x4f/0x70
[  309.662311]    __vfs_write+0x23/0x120
[  309.662315]    vfs_write+0xc6/0x1f0
[  309.662319]    SyS_write+0x44/0xb0
[  309.662323]    entry_SYSCALL_64_fastpath+0x1c/0xb1

[  309.662329]
               stack backtrace:
[  309.662335] CPU: 1 PID: 6435 Comm: gem_exec_gttfil Tainted: G        W       4.11.0-rc1-CI-CI_DRM_2333+ #1
[  309.662342] Hardware name: Hewlett-Packard HP Compaq 8100 Elite SFF PC/304Ah, BIOS 786H1 v01.13 07/14/2011
[  309.662348] Call Trace:
[  309.662354]  dump_stack+0x67/0x92
[  309.662359]  print_irq_inversion_bug.part.19+0x1a4/0x1b0
[  309.662365]  check_usage_forwards+0x12b/0x130
[  309.662369]  mark_lock+0x360/0x6f0
[  309.662374]  ? print_shortest_lock_dependencies+0x1a0/0x1a0
[  309.662379]  __lock_acquire+0x638/0x1b50
[  309.662383]  ? __mutex_unlock_slowpath+0x3e/0x2e0
[  309.662388]  ? trace_hardirqs_on+0xd/0x10
[  309.662392]  ? _rcu_barrier+0x31/0x160
[  309.662396]  lock_acquire+0xc9/0x220
[  309.662400]  ? _rcu_barrier+0x31/0x160
[  309.662404]  ? _rcu_barrier+0x31/0x160
[  309.662409]  __mutex_lock+0x6e/0x990
[  309.662412]  ? _rcu_barrier+0x31/0x160
[  309.662416]  ? _rcu_barrier+0x31/0x160
[  309.662421]  ? synchronize_rcu_expedited+0x35/0xb0
[  309.662426]  ? _raw_spin_unlock_irqrestore+0x52/0x60
[  309.662434]  mutex_lock_nested+0x16/0x20
[  309.662438]  _rcu_barrier+0x31/0x160
[  309.662442]  rcu_barrier+0x10/0x20
[  309.662464]  i915_gem_shrink_all+0x33/0x40 [i915]
[  309.662484]  i915_drop_caches_set+0x141/0x150 [i915]
[  309.662489]  simple_attr_write+0xc7/0xe0
[  309.662494]  full_proxy_write+0x4f/0x70
[  309.662498]  __vfs_write+0x23/0x120
[  309.662503]  ? rcu_read_lock_sched_held+0x75/0x80
[  309.662507]  ? rcu_sync_lockdep_assert+0x2a/0x50
[  309.662512]  ? __sb_start_write+0x102/0x210
[  309.662516]  ? vfs_write+0x17d/0x1f0
[  309.662520]  vfs_write+0xc6/0x1f0
[  309.662524]  ? trace_hardirqs_on_caller+0xe7/0x200
[  309.662529]  SyS_write+0x44/0xb0
[  309.662533]  entry_SYSCALL_64_fastpath+0x1c/0xb1
[  309.662537] RIP: 0033:0x7f507eac24a0
[  309.662541] RSP: 002b:00007fffda8720e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  309.662548] RAX: ffffffffffffffda RBX: ffffffff81482bd3 RCX: 00007f507eac24a0
[  309.662552] RDX: 0000000000000005 RSI: 00007fffda8720f0 RDI: 0000000000000005
[  309.662557] RBP: ffffc9000048bf88 R08: 0000000000000000 R09: 000000000000002c
[  309.662561] R10: 0000000000000014 R11: 0000000000000246 R12: 00007fffda872230
[  309.662566] R13: 00007fffda872228 R14: 0000000000000201 R15: 00007fffda8720f0
[  309.662572]  ? __this_cpu_preempt_check+0x13/0x20

Fixes: 0eafec6d3244 ("drm/i915: Enable lockless lookup of request tracking via RCU")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100192
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: <stable@vger.kernel.org> # v4.9+
Link: http://patchwork.freedesktop.org/patch/msgid/20170314115019.18127-1-chris@chris-wilson.co.uk
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit bd784b7cc41af7a19cfb705fa6d800e511c4ab02)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170321144531.12344-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 401006b4c6a3..d5d2b4c6ed38 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -263,7 +263,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
+	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
 
 	return freed;
 }
-- 
cgit v1.2.3


From 590379aef2e3fb7d00a093ed556c0a2714f86916 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Tue, 21 Mar 2017 14:47:20 +0000
Subject: drm/i915: make context status notifier head be per engine

GVTg has introduced the context status notifier to schedule the GVTg
workload. At that time, the notifier is bound to GVTg context only,
so GVTg is not aware of host workloads.

Now we are going to improve GVTg's guest workload scheduler policy,
and add Guc emulation support for new Gen graphics. Both these two
features require acknowledgment for all contexts running on hardware.
(But will not alter host workload.) So here try to make some change.

The change is simple:
  1. Move the context status notifier head from i915_gem_context to
     intel_engine_cs. Which means there is a notifier head per engine
     instead of per context. Execlist driver still call notifier for
     each context sched-in/out events of current engine.
  2. At GVTg side, it binds a notifier_block for each physical engine
     at GVTg initialization period. Then GVTg can hear all context
     status events.

In this patch, GVTg do nothing for host context event, but later
will add a function there. But in any case, the notifier callback is
a noop if this is no active vGPU.

Since intel_gvt_init() is called at early initialization stage and
require the status notifier head has been initiated, I initiate it in
intel_engine_setup().

v2: remove a redundant newline. (chris)

Fixes: 3c7ba6359d70 ("drm/i915: Introduce execlist context status change notification")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100232
Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313024711.28591-1-changbin.du@intel.com
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 3fc03069bc6e6c316f19bb526e3c8ce784677477)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170321144720.17020-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gvt/gvt.h          |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c    | 45 ++++++++++++++-------------------
 drivers/gpu/drm/i915/i915_gem_context.c |  1 -
 drivers/gpu/drm/i915/i915_gem_context.h |  3 ---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c        |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
 7 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 23791920ced1..6dfc48b63b71 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -162,7 +162,6 @@ struct intel_vgpu {
 	atomic_t running_workload_num;
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	struct i915_gem_context *shadow_ctx;
-	struct notifier_block shadow_ctx_notifier_block;
 
 #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
 	struct {
@@ -233,6 +232,7 @@ struct intel_gvt {
 	struct intel_gvt_gtt gtt;
 	struct intel_gvt_opregion opregion;
 	struct intel_gvt_workload_scheduler scheduler;
+	struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES];
 	DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
 	struct intel_vgpu_type *types;
 	unsigned int num_types;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 39a83eb7aecc..c4353ed86d4b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -130,12 +130,10 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 static int shadow_context_status_change(struct notifier_block *nb,
 		unsigned long action, void *data)
 {
-	struct intel_vgpu *vgpu = container_of(nb,
-			struct intel_vgpu, shadow_ctx_notifier_block);
-	struct drm_i915_gem_request *req =
-		(struct drm_i915_gem_request *)data;
-	struct intel_gvt_workload_scheduler *scheduler =
-		&vgpu->gvt->scheduler;
+	struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
+	struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
+				shadow_ctx_notifier_block[req->engine->id]);
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload =
 		scheduler->current_workload[req->engine->id];
 
@@ -534,15 +532,16 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-	int i;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id i;
 
 	gvt_dbg_core("clean workload scheduler\n");
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		if (scheduler->thread[i]) {
-			kthread_stop(scheduler->thread[i]);
-			scheduler->thread[i] = NULL;
-		}
+	for_each_engine(engine, gvt->dev_priv, i) {
+		atomic_notifier_chain_unregister(
+					&engine->context_status_notifier,
+					&gvt->shadow_ctx_notifier_block[i]);
+		kthread_stop(scheduler->thread[i]);
 	}
 }
 
@@ -550,18 +549,15 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct workload_thread_param *param = NULL;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id i;
 	int ret;
-	int i;
 
 	gvt_dbg_core("init workload scheduler\n");
 
 	init_waitqueue_head(&scheduler->workload_complete_wq);
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		/* check ring mask at init time */
-		if (!HAS_ENGINE(gvt->dev_priv, i))
-			continue;
-
+	for_each_engine(engine, gvt->dev_priv, i) {
 		init_waitqueue_head(&scheduler->waitq[i]);
 
 		param = kzalloc(sizeof(*param), GFP_KERNEL);
@@ -580,6 +576,11 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 			ret = PTR_ERR(scheduler->thread[i]);
 			goto err;
 		}
+
+		gvt->shadow_ctx_notifier_block[i].notifier_call =
+					shadow_context_status_change;
+		atomic_notifier_chain_register(&engine->context_status_notifier,
+					&gvt->shadow_ctx_notifier_block[i]);
 	}
 	return 0;
 err:
@@ -591,9 +592,6 @@ err:
 
 void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu)
 {
-	atomic_notifier_chain_unregister(&vgpu->shadow_ctx->status_notifier,
-			&vgpu->shadow_ctx_notifier_block);
-
 	i915_gem_context_put_unlocked(vgpu->shadow_ctx);
 }
 
@@ -608,10 +606,5 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
 
 	vgpu->shadow_ctx->engine[RCS].initialised = true;
 
-	vgpu->shadow_ctx_notifier_block.notifier_call =
-		shadow_context_status_change;
-
-	atomic_notifier_chain_register(&vgpu->shadow_ctx->status_notifier,
-				       &vgpu->shadow_ctx_notifier_block);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 17f90c618208..e2d83b6d376b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -311,7 +311,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->ring_size = 4 * PAGE_SIZE;
 	ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
 			     GEN8_CTX_ADDRESSING_MODE_SHIFT;
-	ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
 
 	/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
 	 * present or not in use we still need a small bias as ring wraparound
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 0ac750b90f3d..e9c008fe14b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -160,9 +160,6 @@ struct i915_gem_context {
 	/** desc_template: invariant fields for the HW context descriptor */
 	u32 desc_template;
 
-	/** status_notifier: list of callbacks for context-switch changes */
-	struct atomic_notifier_head status_notifier;
-
 	/** guilty_count: How many times this context has caused a GPU hang. */
 	unsigned int guilty_count;
 	/**
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 371acf109e34..ab1be5c80ea5 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -105,6 +105,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
 
+	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
+
 	dev_priv->engine[id] = engine;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ebf8023d21e6..471af3b480ad 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -345,7 +345,8 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
 		return;
 
-	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
+	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
+				   status, rq);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 79c2b8d72322..13dccb18cd43 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -403,6 +403,9 @@ struct intel_engine_cs {
 	 */
 	struct i915_gem_context *legacy_active_context;
 
+	/* status_notifier: list of callbacks for context-switch changes */
+	struct atomic_notifier_head context_status_notifier;
+
 	struct intel_engine_hangcheck hangcheck;
 
 	bool needs_cmd_parser;
-- 
cgit v1.2.3


From 5b52330bbfe63b3305765354d6046c9f7f89c011 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 21 Mar 2017 11:26:35 -0400
Subject: audit: fix auditd/kernel connection state tracking

What started as a rather straightforward race condition reported by
Dmitry using the syzkaller fuzzer ended up revealing some major
problems with how the audit subsystem managed its netlink sockets and
its connection with the userspace audit daemon.  Fixing this properly
had quite the cascading effect and what we are left with is this rather
large and complicated patch.  My initial goal was to try and decompose
this patch into multiple smaller patches, but the way these changes
are intertwined makes it difficult to split these changes into
meaningful pieces that don't break or somehow make things worse for
the intermediate states.

The patch makes a number of changes, but the most significant are
highlighted below:

* The auditd tracking variables, e.g. audit_sock, are now gone and
replaced by a RCU/spin_lock protected variable auditd_conn which is
a structure containing all of the auditd tracking information.

* We no longer track the auditd sock directly, instead we track it
via the network namespace in which it resides and we use the audit
socket associated with that namespace.  In spirit, this is what the
code was trying to do prior to this patch (at least I think that is
what the original authors intended), but it was done rather poorly
and added a layer of obfuscation that only masked the underlying
problems.

* Big backlog queue cleanup, again.  In v4.10 we made some pretty big
changes to how the audit backlog queues work, here we haven't changed
the queue design so much as cleaned up the implementation.  Brought
about by the locking changes, we've simplified kauditd_thread() quite
a bit by consolidating the queue handling into a new helper function,
kauditd_send_queue(), which allows us to eliminate a lot of very
similar code and makes the looping logic in kauditd_thread() clearer.

* All netlink messages sent to auditd are now sent via
auditd_send_unicast_skb().  Other than just making sense, this makes
the lock handling easier.

* Change the audit_log_start() sleep behavior so that we never sleep
on auditd events (unchanged) or if the caller is holding the
audit_cmd_mutex (changed).  Previously we didn't sleep if the caller
was auditd or if the message type fell between a certain range; the
type check was a poor effort of doing what the cmd_mutex check now
does.  Richard Guy Briggs originally proposed not sleeping the
cmd_mutex owner several years ago but his patch wasn't acceptable
at the time.  At least the idea lives on here.

* A problem with the lost record counter has been resolved.  Steve
Grubb and I both happened to notice this problem and according to
some quick testing by Steve, this problem goes back quite some time.
It's largely a harmless problem, although it may have left some
careful sysadmins quite puzzled.

Cc: <stable@vger.kernel.org> # 4.10.x-
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.c   | 639 ++++++++++++++++++++++++++++++++++---------------------
 kernel/audit.h   |   9 +-
 kernel/auditsc.c |   6 +-
 3 files changed, 399 insertions(+), 255 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index e794544f5e63..2f4964cfde0b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -54,6 +54,10 @@
 #include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/gfp.h>
 
 #include <linux/audit.h>
 
@@ -90,13 +94,34 @@ static u32	audit_default;
 /* If auditing cannot proceed, audit_failure selects what happens. */
 static u32	audit_failure = AUDIT_FAIL_PRINTK;
 
-/*
- * If audit records are to be written to the netlink socket, audit_pid
- * contains the pid of the auditd process and audit_nlk_portid contains
- * the portid to use to send netlink messages to that process.
+/* private audit network namespace index */
+static unsigned int audit_net_id;
+
+/**
+ * struct audit_net - audit private network namespace data
+ * @sk: communication socket
+ */
+struct audit_net {
+	struct sock *sk;
+};
+
+/**
+ * struct auditd_connection - kernel/auditd connection state
+ * @pid: auditd PID
+ * @portid: netlink portid
+ * @net: the associated network namespace
+ * @lock: spinlock to protect write access
+ *
+ * Description:
+ * This struct is RCU protected; you must either hold the RCU lock for reading
+ * or the included spinlock for writing.
  */
-int		audit_pid;
-static __u32	audit_nlk_portid;
+static struct auditd_connection {
+	int pid;
+	u32 portid;
+	struct net *net;
+	spinlock_t lock;
+} auditd_conn;
 
 /* If audit_rate_limit is non-zero, limit the rate of sending audit records
  * to that number per second.  This prevents DoS attacks, but results in
@@ -123,10 +148,6 @@ u32		audit_sig_sid = 0;
 */
 static atomic_t	audit_lost = ATOMIC_INIT(0);
 
-/* The netlink socket. */
-static struct sock *audit_sock;
-static unsigned int audit_net_id;
-
 /* Hash for inode-based rules */
 struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
 
@@ -139,6 +160,7 @@ static LIST_HEAD(audit_freelist);
 
 /* queue msgs to send via kauditd_task */
 static struct sk_buff_head audit_queue;
+static void kauditd_hold_skb(struct sk_buff *skb);
 /* queue msgs due to temporary unicast send problems */
 static struct sk_buff_head audit_retry_queue;
 /* queue msgs waiting for new auditd connection */
@@ -192,6 +214,43 @@ struct audit_reply {
 	struct sk_buff *skb;
 };
 
+/**
+ * auditd_test_task - Check to see if a given task is an audit daemon
+ * @task: the task to check
+ *
+ * Description:
+ * Return 1 if the task is a registered audit daemon, 0 otherwise.
+ */
+int auditd_test_task(const struct task_struct *task)
+{
+	int rc;
+
+	rcu_read_lock();
+	rc = (auditd_conn.pid && task->tgid == auditd_conn.pid ? 1 : 0);
+	rcu_read_unlock();
+
+	return rc;
+}
+
+/**
+ * audit_get_sk - Return the audit socket for the given network namespace
+ * @net: the destination network namespace
+ *
+ * Description:
+ * Returns the sock pointer if valid, NULL otherwise.  The caller must ensure
+ * that a reference is held for the network namespace while the sock is in use.
+ */
+static struct sock *audit_get_sk(const struct net *net)
+{
+	struct audit_net *aunet;
+
+	if (!net)
+		return NULL;
+
+	aunet = net_generic(net, audit_net_id);
+	return aunet->sk;
+}
+
 static void audit_set_portid(struct audit_buffer *ab, __u32 portid)
 {
 	if (ab) {
@@ -210,9 +269,7 @@ void audit_panic(const char *message)
 			pr_err("%s\n", message);
 		break;
 	case AUDIT_FAIL_PANIC:
-		/* test audit_pid since printk is always losey, why bother? */
-		if (audit_pid)
-			panic("audit: %s\n", message);
+		panic("audit: %s\n", message);
 		break;
 	}
 }
@@ -370,21 +427,87 @@ static int audit_set_failure(u32 state)
 	return audit_do_config_change("audit_failure", &audit_failure, state);
 }
 
-/*
- * For one reason or another this nlh isn't getting delivered to the userspace
- * audit daemon, just send it to printk.
+/**
+ * auditd_set - Set/Reset the auditd connection state
+ * @pid: auditd PID
+ * @portid: auditd netlink portid
+ * @net: auditd network namespace pointer
+ *
+ * Description:
+ * This function will obtain and drop network namespace references as
+ * necessary.
+ */
+static void auditd_set(int pid, u32 portid, struct net *net)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&auditd_conn.lock, flags);
+	auditd_conn.pid = pid;
+	auditd_conn.portid = portid;
+	if (auditd_conn.net)
+		put_net(auditd_conn.net);
+	if (net)
+		auditd_conn.net = get_net(net);
+	else
+		auditd_conn.net = NULL;
+	spin_unlock_irqrestore(&auditd_conn.lock, flags);
+}
+
+/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
+ */
+static void auditd_reset(void)
+{
+	struct sk_buff *skb;
+
+	/* if it isn't already broken, break the connection */
+	rcu_read_lock();
+	if (auditd_conn.pid)
+		auditd_set(0, 0, NULL);
+	rcu_read_unlock();
+
+	/* flush all of the main and retry queues to the hold queue */
+	while ((skb = skb_dequeue(&audit_retry_queue)))
+		kauditd_hold_skb(skb);
+	while ((skb = skb_dequeue(&audit_queue)))
+		kauditd_hold_skb(skb);
+}
+
+/**
+ * kauditd_print_skb - Print the audit record to the ring buffer
+ * @skb: audit record
+ *
+ * Whatever the reason, this packet may not make it to the auditd connection
+ * so write it via printk so the information isn't completely lost.
  */
 static void kauditd_printk_skb(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 	char *data = nlmsg_data(nlh);
 
-	if (nlh->nlmsg_type != AUDIT_EOE) {
-		if (printk_ratelimit())
-			pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
-		else
-			audit_log_lost("printk limit exceeded");
-	}
+	if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit())
+		pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
+}
+
+/**
+ * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
+ * @skb: audit record
+ *
+ * Description:
+ * This should only be used by the kauditd_thread when it fails to flush the
+ * hold queue.
+ */
+static void kauditd_rehold_skb(struct sk_buff *skb)
+{
+	/* put the record back in the queue at the same place */
+	skb_queue_head(&audit_hold_queue, skb);
+
+	/* fail the auditd connection */
+	auditd_reset();
 }
 
 /**
@@ -421,6 +544,9 @@ static void kauditd_hold_skb(struct sk_buff *skb)
 	/* we have no other options - drop the message */
 	audit_log_lost("kauditd hold queue overflow");
 	kfree_skb(skb);
+
+	/* fail the auditd connection */
+	auditd_reset();
 }
 
 /**
@@ -441,51 +567,122 @@ static void kauditd_retry_skb(struct sk_buff *skb)
 }
 
 /**
- * auditd_reset - Disconnect the auditd connection
+ * auditd_send_unicast_skb - Send a record via unicast to auditd
+ * @skb: audit record
  *
  * Description:
- * Break the auditd/kauditd connection and move all the records in the retry
- * queue into the hold queue in case auditd reconnects.  The audit_cmd_mutex
- * must be held when calling this function.
+ * Send a skb to the audit daemon, returns positive/zero values on success and
+ * negative values on failure; in all cases the skb will be consumed by this
+ * function.  If the send results in -ECONNREFUSED the connection with auditd
+ * will be reset.  This function may sleep so callers should not hold any locks
+ * where this would cause a problem.
  */
-static void auditd_reset(void)
+static int auditd_send_unicast_skb(struct sk_buff *skb)
 {
-	struct sk_buff *skb;
-
-	/* break the connection */
-	if (audit_sock) {
-		sock_put(audit_sock);
-		audit_sock = NULL;
+	int rc;
+	u32 portid;
+	struct net *net;
+	struct sock *sk;
+
+	/* NOTE: we can't call netlink_unicast while in the RCU section so
+	 *       take a reference to the network namespace and grab local
+	 *       copies of the namespace, the sock, and the portid; the
+	 *       namespace and sock aren't going to go away while we hold a
+	 *       reference and if the portid does become invalid after the RCU
+	 *       section netlink_unicast() should safely return an error */
+
+	rcu_read_lock();
+	if (!auditd_conn.pid) {
+		rcu_read_unlock();
+		rc = -ECONNREFUSED;
+		goto err;
 	}
-	audit_pid = 0;
-	audit_nlk_portid = 0;
+	net = auditd_conn.net;
+	get_net(net);
+	sk = audit_get_sk(net);
+	portid = auditd_conn.portid;
+	rcu_read_unlock();
 
-	/* flush all of the retry queue to the hold queue */
-	while ((skb = skb_dequeue(&audit_retry_queue)))
-		kauditd_hold_skb(skb);
+	rc = netlink_unicast(sk, skb, portid, 0);
+	put_net(net);
+	if (rc < 0)
+		goto err;
+
+	return rc;
+
+err:
+	if (rc == -ECONNREFUSED)
+		auditd_reset();
+	return rc;
 }
 
 /**
- * kauditd_send_unicast_skb - Send a record via unicast to auditd
- * @skb: audit record
+ * kauditd_send_queue - Helper for kauditd_thread to flush skb queues
+ * @sk: the sending sock
+ * @portid: the netlink destination
+ * @queue: the skb queue to process
+ * @retry_limit: limit on number of netlink unicast failures
+ * @skb_hook: per-skb hook for additional processing
+ * @err_hook: hook called if the skb fails the netlink unicast send
+ *
+ * Description:
+ * Run through the given queue and attempt to send the audit records to auditd,
+ * returns zero on success, negative values on failure.  It is up to the caller
+ * to ensure that the @sk is valid for the duration of this function.
+ *
  */
-static int kauditd_send_unicast_skb(struct sk_buff *skb)
+static int kauditd_send_queue(struct sock *sk, u32 portid,
+			      struct sk_buff_head *queue,
+			      unsigned int retry_limit,
+			      void (*skb_hook)(struct sk_buff *skb),
+			      void (*err_hook)(struct sk_buff *skb))
 {
-	int rc;
+	int rc = 0;
+	struct sk_buff *skb;
+	static unsigned int failed = 0;
 
-	/* if we know nothing is connected, don't even try the netlink call */
-	if (!audit_pid)
-		return -ECONNREFUSED;
+	/* NOTE: kauditd_thread takes care of all our locking, we just use
+	 *       the netlink info passed to us (e.g. sk and portid) */
+
+	while ((skb = skb_dequeue(queue))) {
+		/* call the skb_hook for each skb we touch */
+		if (skb_hook)
+			(*skb_hook)(skb);
+
+		/* can we send to anyone via unicast? */
+		if (!sk) {
+			if (err_hook)
+				(*err_hook)(skb);
+			continue;
+		}
 
-	/* get an extra skb reference in case we fail to send */
-	skb_get(skb);
-	rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
-	if (rc >= 0) {
-		consume_skb(skb);
-		rc = 0;
+		/* grab an extra skb reference in case of error */
+		skb_get(skb);
+		rc = netlink_unicast(sk, skb, portid, 0);
+		if (rc < 0) {
+			/* fatal failure for our queue flush attempt? */
+			if (++failed >= retry_limit ||
+			    rc == -ECONNREFUSED || rc == -EPERM) {
+				/* yes - error processing for the queue */
+				sk = NULL;
+				if (err_hook)
+					(*err_hook)(skb);
+				if (!skb_hook)
+					goto out;
+				/* keep processing with the skb_hook */
+				continue;
+			} else
+				/* no - requeue to preserve ordering */
+				skb_queue_head(queue, skb);
+		} else {
+			/* it worked - drop the extra reference and continue */
+			consume_skb(skb);
+			failed = 0;
+		}
 	}
 
-	return rc;
+out:
+	return (rc >= 0 ? 0 : rc);
 }
 
 /*
@@ -493,16 +690,19 @@ static int kauditd_send_unicast_skb(struct sk_buff *skb)
  * @skb: audit record
  *
  * Description:
- * This function doesn't consume an skb as might be expected since it has to
- * copy it anyways.
+ * Write a multicast message to anyone listening in the initial network
+ * namespace.  This function doesn't consume an skb as might be expected since
+ * it has to copy it anyways.
  */
 static void kauditd_send_multicast_skb(struct sk_buff *skb)
 {
 	struct sk_buff *copy;
-	struct audit_net *aunet = net_generic(&init_net, audit_net_id);
-	struct sock *sock = aunet->nlsk;
+	struct sock *sock = audit_get_sk(&init_net);
 	struct nlmsghdr *nlh;
 
+	/* NOTE: we are not taking an additional reference for init_net since
+	 *       we don't have to worry about it going away */
+
 	if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
 		return;
 
@@ -526,149 +726,75 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb)
 }
 
 /**
- * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
- *
- * Description:
- * This function is for use by the wait_event_freezable() call in
- * kauditd_thread().
+ * kauditd_thread - Worker thread to send audit records to userspace
+ * @dummy: unused
  */
-static int kauditd_wake_condition(void)
-{
-	static int pid_last = 0;
-	int rc;
-	int pid = audit_pid;
-
-	/* wake on new messages or a change in the connected auditd */
-	rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
-	if (rc)
-		pid_last = pid;
-
-	return rc;
-}
-
 static int kauditd_thread(void *dummy)
 {
 	int rc;
-	int auditd = 0;
-	int reschedule = 0;
-	struct sk_buff *skb;
-	struct nlmsghdr *nlh;
+	u32 portid = 0;
+	struct net *net = NULL;
+	struct sock *sk = NULL;
 
 #define UNICAST_RETRIES 5
-#define AUDITD_BAD(x,y) \
-	((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
-
-	/* NOTE: we do invalidate the auditd connection flag on any sending
-	 * errors, but we only "restore" the connection flag at specific places
-	 * in the loop in order to help ensure proper ordering of audit
-	 * records */
 
 	set_freezable();
 	while (!kthread_should_stop()) {
-		/* NOTE: possible area for future improvement is to look at
-		 *       the hold and retry queues, since only this thread
-		 *       has access to these queues we might be able to do
-		 *       our own queuing and skip some/all of the locking */
-
-		/* NOTE: it might be a fun experiment to split the hold and
-		 *       retry queue handling to another thread, but the
-		 *       synchronization issues and other overhead might kill
-		 *       any performance gains */
+		/* NOTE: see the lock comments in auditd_send_unicast_skb() */
+		rcu_read_lock();
+		if (!auditd_conn.pid) {
+			rcu_read_unlock();
+			goto main_queue;
+		}
+		net = auditd_conn.net;
+		get_net(net);
+		sk = audit_get_sk(net);
+		portid = auditd_conn.portid;
+		rcu_read_unlock();
 
 		/* attempt to flush the hold queue */
-		while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
-			rc = kauditd_send_unicast_skb(skb);
-			if (rc) {
-				/* requeue to the same spot */
-				skb_queue_head(&audit_hold_queue, skb);
-
-				auditd = 0;
-				if (AUDITD_BAD(rc, reschedule)) {
-					mutex_lock(&audit_cmd_mutex);
-					auditd_reset();
-					mutex_unlock(&audit_cmd_mutex);
-					reschedule = 0;
-				}
-			} else
-				/* we were able to send successfully */
-				reschedule = 0;
+		rc = kauditd_send_queue(sk, portid,
+					&audit_hold_queue, UNICAST_RETRIES,
+					NULL, kauditd_rehold_skb);
+		if (rc < 0) {
+			sk = NULL;
+			goto main_queue;
 		}
 
 		/* attempt to flush the retry queue */
-		while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
-			rc = kauditd_send_unicast_skb(skb);
-			if (rc) {
-				auditd = 0;
-				if (AUDITD_BAD(rc, reschedule)) {
-					kauditd_hold_skb(skb);
-					mutex_lock(&audit_cmd_mutex);
-					auditd_reset();
-					mutex_unlock(&audit_cmd_mutex);
-					reschedule = 0;
-				} else
-					/* temporary problem (we hope), queue
-					 * to the same spot and retry */
-					skb_queue_head(&audit_retry_queue, skb);
-			} else
-				/* we were able to send successfully */
-				reschedule = 0;
+		rc = kauditd_send_queue(sk, portid,
+					&audit_retry_queue, UNICAST_RETRIES,
+					NULL, kauditd_hold_skb);
+		if (rc < 0) {
+			sk = NULL;
+			goto main_queue;
 		}
 
-		/* standard queue processing, try to be as quick as possible */
-quick_loop:
-		skb = skb_dequeue(&audit_queue);
-		if (skb) {
-			/* setup the netlink header, see the comments in
-			 * kauditd_send_multicast_skb() for length quirks */
-			nlh = nlmsg_hdr(skb);
-			nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
-
-			/* attempt to send to any multicast listeners */
-			kauditd_send_multicast_skb(skb);
-
-			/* attempt to send to auditd, queue on failure */
-			if (auditd) {
-				rc = kauditd_send_unicast_skb(skb);
-				if (rc) {
-					auditd = 0;
-					if (AUDITD_BAD(rc, reschedule)) {
-						mutex_lock(&audit_cmd_mutex);
-						auditd_reset();
-						mutex_unlock(&audit_cmd_mutex);
-						reschedule = 0;
-					}
-
-					/* move to the retry queue */
-					kauditd_retry_skb(skb);
-				} else
-					/* everything is working so go fast! */
-					goto quick_loop;
-			} else if (reschedule)
-				/* we are currently having problems, move to
-				 * the retry queue */
-				kauditd_retry_skb(skb);
-			else
-				/* dump the message via printk and hold it */
-				kauditd_hold_skb(skb);
-		} else {
-			/* we have flushed the backlog so wake everyone */
-			wake_up(&audit_backlog_wait);
-
-			/* if everything is okay with auditd (if present), go
-			 * to sleep until there is something new in the queue
-			 * or we have a change in the connected auditd;
-			 * otherwise simply reschedule to give things a chance
-			 * to recover */
-			if (reschedule) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				schedule();
-			} else
-				wait_event_freezable(kauditd_wait,
-						     kauditd_wake_condition());
-
-			/* update the auditd connection status */
-			auditd = (audit_pid ? 1 : 0);
+main_queue:
+		/* process the main queue - do the multicast send and attempt
+		 * unicast, dump failed record sends to the retry queue; if
+		 * sk == NULL due to previous failures we will just do the
+		 * multicast send and move the record to the retry queue */
+		kauditd_send_queue(sk, portid, &audit_queue, 1,
+				   kauditd_send_multicast_skb,
+				   kauditd_retry_skb);
+
+		/* drop our netns reference, no auditd sends past this line */
+		if (net) {
+			put_net(net);
+			net = NULL;
 		}
+		sk = NULL;
+
+		/* we have processed all the queues so wake everyone */
+		wake_up(&audit_backlog_wait);
+
+		/* NOTE: we want to wake up if there is anything on the queue,
+		 *       regardless of if an auditd is connected, as we need to
+		 *       do the multicast send and rotate records from the
+		 *       main queue to the retry/hold queues */
+		wait_event_freezable(kauditd_wait,
+				     (skb_queue_len(&audit_queue) ? 1 : 0));
 	}
 
 	return 0;
@@ -678,17 +804,16 @@ int audit_send_list(void *_dest)
 {
 	struct audit_netlink_list *dest = _dest;
 	struct sk_buff *skb;
-	struct net *net = dest->net;
-	struct audit_net *aunet = net_generic(net, audit_net_id);
+	struct sock *sk = audit_get_sk(dest->net);
 
 	/* wait for parent to finish and send an ACK */
 	mutex_lock(&audit_cmd_mutex);
 	mutex_unlock(&audit_cmd_mutex);
 
 	while ((skb = __skb_dequeue(&dest->q)) != NULL)
-		netlink_unicast(aunet->nlsk, skb, dest->portid, 0);
+		netlink_unicast(sk, skb, dest->portid, 0);
 
-	put_net(net);
+	put_net(dest->net);
 	kfree(dest);
 
 	return 0;
@@ -722,16 +847,15 @@ out_kfree_skb:
 static int audit_send_reply_thread(void *arg)
 {
 	struct audit_reply *reply = (struct audit_reply *)arg;
-	struct net *net = reply->net;
-	struct audit_net *aunet = net_generic(net, audit_net_id);
+	struct sock *sk = audit_get_sk(reply->net);
 
 	mutex_lock(&audit_cmd_mutex);
 	mutex_unlock(&audit_cmd_mutex);
 
 	/* Ignore failure. It'll only happen if the sender goes away,
 	   because our timeout is set to infinite. */
-	netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0);
-	put_net(net);
+	netlink_unicast(sk, reply->skb, reply->portid, 0);
+	put_net(reply->net);
 	kfree(reply);
 	return 0;
 }
@@ -949,12 +1073,12 @@ static int audit_set_feature(struct sk_buff *skb)
 
 static int audit_replace(pid_t pid)
 {
-	struct sk_buff *skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0,
-					       &pid, sizeof(pid));
+	struct sk_buff *skb;
 
+	skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, &pid, sizeof(pid));
 	if (!skb)
 		return -ENOMEM;
-	return netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+	return auditd_send_unicast_skb(skb);
 }
 
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -981,7 +1105,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		memset(&s, 0, sizeof(s));
 		s.enabled		= audit_enabled;
 		s.failure		= audit_failure;
-		s.pid			= audit_pid;
+		rcu_read_lock();
+		s.pid			= auditd_conn.pid;
+		rcu_read_unlock();
 		s.rate_limit		= audit_rate_limit;
 		s.backlog_limit		= audit_backlog_limit;
 		s.lost			= atomic_read(&audit_lost);
@@ -1014,30 +1140,44 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			 *       from the initial pid namespace, but something
 			 *       to keep in mind if this changes */
 			int new_pid = s.pid;
+			pid_t auditd_pid;
 			pid_t requesting_pid = task_tgid_vnr(current);
 
-			if ((!new_pid) && (requesting_pid != audit_pid)) {
-				audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+			/* test the auditd connection */
+			audit_replace(requesting_pid);
+
+			rcu_read_lock();
+			auditd_pid = auditd_conn.pid;
+			/* only the current auditd can unregister itself */
+			if ((!new_pid) && (requesting_pid != auditd_pid)) {
+				rcu_read_unlock();
+				audit_log_config_change("audit_pid", new_pid,
+							auditd_pid, 0);
 				return -EACCES;
 			}
-			if (audit_pid && new_pid &&
-			    audit_replace(requesting_pid) != -ECONNREFUSED) {
-				audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+			/* replacing a healthy auditd is not allowed */
+			if (auditd_pid && new_pid) {
+				rcu_read_unlock();
+				audit_log_config_change("audit_pid", new_pid,
+							auditd_pid, 0);
 				return -EEXIST;
 			}
+			rcu_read_unlock();
+
 			if (audit_enabled != AUDIT_OFF)
-				audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
+				audit_log_config_change("audit_pid", new_pid,
+							auditd_pid, 1);
+
 			if (new_pid) {
-				if (audit_sock)
-					sock_put(audit_sock);
-				audit_pid = new_pid;
-				audit_nlk_portid = NETLINK_CB(skb).portid;
-				sock_hold(skb->sk);
-				audit_sock = skb->sk;
-			} else {
+				/* register a new auditd connection */
+				auditd_set(new_pid,
+					   NETLINK_CB(skb).portid,
+					   sock_net(NETLINK_CB(skb).sk));
+				/* try to process any backlog */
+				wake_up_interruptible(&kauditd_wait);
+			} else
+				/* unregister the auditd connection */
 				auditd_reset();
-			}
-			wake_up_interruptible(&kauditd_wait);
 		}
 		if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
 			err = audit_set_rate_limit(s.rate_limit);
@@ -1090,7 +1230,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				if (err)
 					break;
 			}
-			mutex_unlock(&audit_cmd_mutex);
 			audit_log_common_recv_msg(&ab, msg_type);
 			if (msg_type != AUDIT_USER_TTY)
 				audit_log_format(ab, " msg='%.*s'",
@@ -1108,7 +1247,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			}
 			audit_set_portid(ab, NETLINK_CB(skb).portid);
 			audit_log_end(ab);
-			mutex_lock(&audit_cmd_mutex);
 		}
 		break;
 	case AUDIT_ADD_RULE:
@@ -1298,26 +1436,26 @@ static int __net_init audit_net_init(struct net *net)
 
 	struct audit_net *aunet = net_generic(net, audit_net_id);
 
-	aunet->nlsk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
-	if (aunet->nlsk == NULL) {
+	aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
+	if (aunet->sk == NULL) {
 		audit_panic("cannot initialize netlink socket in namespace");
 		return -ENOMEM;
 	}
-	aunet->nlsk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+
 	return 0;
 }
 
 static void __net_exit audit_net_exit(struct net *net)
 {
 	struct audit_net *aunet = net_generic(net, audit_net_id);
-	struct sock *sock = aunet->nlsk;
-	mutex_lock(&audit_cmd_mutex);
-	if (sock == audit_sock)
+
+	rcu_read_lock();
+	if (net == auditd_conn.net)
 		auditd_reset();
-	mutex_unlock(&audit_cmd_mutex);
+	rcu_read_unlock();
 
-	netlink_kernel_release(sock);
-	aunet->nlsk = NULL;
+	netlink_kernel_release(aunet->sk);
 }
 
 static struct pernet_operations audit_net_ops __net_initdata = {
@@ -1335,20 +1473,24 @@ static int __init audit_init(void)
 	if (audit_initialized == AUDIT_DISABLED)
 		return 0;
 
-	pr_info("initializing netlink subsys (%s)\n",
-		audit_default ? "enabled" : "disabled");
-	register_pernet_subsys(&audit_net_ops);
+	memset(&auditd_conn, 0, sizeof(auditd_conn));
+	spin_lock_init(&auditd_conn.lock);
 
 	skb_queue_head_init(&audit_queue);
 	skb_queue_head_init(&audit_retry_queue);
 	skb_queue_head_init(&audit_hold_queue);
-	audit_initialized = AUDIT_INITIALIZED;
-	audit_enabled = audit_default;
-	audit_ever_enabled |= !!audit_default;
 
 	for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
 		INIT_LIST_HEAD(&audit_inode_hash[i]);
 
+	pr_info("initializing netlink subsys (%s)\n",
+		audit_default ? "enabled" : "disabled");
+	register_pernet_subsys(&audit_net_ops);
+
+	audit_initialized = AUDIT_INITIALIZED;
+	audit_enabled = audit_default;
+	audit_ever_enabled |= !!audit_default;
+
 	kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
 	if (IS_ERR(kauditd_task)) {
 		int err = PTR_ERR(kauditd_task);
@@ -1519,20 +1661,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 	if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
 		return NULL;
 
-	/* don't ever fail/sleep on these two conditions:
+	/* NOTE: don't ever fail/sleep on these two conditions:
 	 * 1. auditd generated record - since we need auditd to drain the
 	 *    queue; also, when we are checking for auditd, compare PIDs using
 	 *    task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
 	 *    using a PID anchored in the caller's namespace
-	 * 2. audit command message - record types 1000 through 1099 inclusive
-	 *    are command messages/records used to manage the kernel subsystem
-	 *    and the audit userspace, blocking on these messages could cause
-	 *    problems under load so don't do it (note: not all of these
-	 *    command types are valid as record types, but it is quicker to
-	 *    just check two ints than a series of ints in a if/switch stmt) */
-	if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
-	      (type >= 1000 && type <= 1099))) {
-		long sleep_time = audit_backlog_wait_time;
+	 * 2. generator holding the audit_cmd_mutex - we don't want to block
+	 *    while holding the mutex */
+	if (!(auditd_test_task(current) ||
+	      (current == __mutex_owner(&audit_cmd_mutex)))) {
+		long stime = audit_backlog_wait_time;
 
 		while (audit_backlog_limit &&
 		       (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
@@ -1541,14 +1679,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 
 			/* sleep if we are allowed and we haven't exhausted our
 			 * backlog wait limit */
-			if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
-			    (sleep_time > 0)) {
+			if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
 				DECLARE_WAITQUEUE(wait, current);
 
 				add_wait_queue_exclusive(&audit_backlog_wait,
 							 &wait);
 				set_current_state(TASK_UNINTERRUPTIBLE);
-				sleep_time = schedule_timeout(sleep_time);
+				stime = schedule_timeout(stime);
 				remove_wait_queue(&audit_backlog_wait, &wait);
 			} else {
 				if (audit_rate_check() && printk_ratelimit())
@@ -2127,15 +2264,27 @@ out:
  */
 void audit_log_end(struct audit_buffer *ab)
 {
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+
 	if (!ab)
 		return;
-	if (!audit_rate_check()) {
-		audit_log_lost("rate limit exceeded");
-	} else {
-		skb_queue_tail(&audit_queue, ab->skb);
-		wake_up_interruptible(&kauditd_wait);
+
+	if (audit_rate_check()) {
+		skb = ab->skb;
 		ab->skb = NULL;
-	}
+
+		/* setup the netlink header, see the comments in
+		 * kauditd_send_multicast_skb() for length quirks */
+		nlh = nlmsg_hdr(skb);
+		nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+		/* queue the netlink packet and poke the kauditd thread */
+		skb_queue_tail(&audit_queue, skb);
+		wake_up_interruptible(&kauditd_wait);
+	} else
+		audit_log_lost("rate limit exceeded");
+
 	audit_buffer_free(ab);
 }
 
diff --git a/kernel/audit.h b/kernel/audit.h
index ca579880303a..0f1cf6d1878a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -218,7 +218,7 @@ extern void audit_log_name(struct audit_context *context,
 			   struct audit_names *n, const struct path *path,
 			   int record_num, int *call_panic);
 
-extern int audit_pid;
+extern int auditd_test_task(const struct task_struct *task);
 
 #define AUDIT_INODE_BUCKETS	32
 extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -250,10 +250,6 @@ struct audit_netlink_list {
 
 int audit_send_list(void *);
 
-struct audit_net {
-	struct sock *nlsk;
-};
-
 extern int selinux_audit_rule_update(void);
 
 extern struct mutex audit_filter_mutex;
@@ -340,8 +336,7 @@ extern int audit_filter(int msgtype, unsigned int listtype);
 extern int __audit_signal_info(int sig, struct task_struct *t);
 static inline int audit_signal_info(int sig, struct task_struct *t)
 {
-	if (unlikely((audit_pid && t->tgid == audit_pid) ||
-		     (audit_signals && !audit_dummy_context())))
+	if (auditd_test_task(t) || (audit_signals && !audit_dummy_context()))
 		return __audit_signal_info(sig, t);
 	return 0;
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d6a8de5f8fa3..e59ffc7fc522 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -762,7 +762,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 	struct audit_entry *e;
 	enum audit_state state;
 
-	if (audit_pid && tsk->tgid == audit_pid)
+	if (auditd_test_task(tsk))
 		return AUDIT_DISABLED;
 
 	rcu_read_lock();
@@ -816,7 +816,7 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
 {
 	struct audit_names *n;
 
-	if (audit_pid && tsk->tgid == audit_pid)
+	if (auditd_test_task(tsk))
 		return;
 
 	rcu_read_lock();
@@ -2256,7 +2256,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	struct audit_context *ctx = tsk->audit_context;
 	kuid_t uid = current_uid(), t_uid = task_uid(t);
 
-	if (audit_pid && t->tgid == audit_pid) {
+	if (auditd_test_task(t)) {
 		if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
 			audit_sig_pid = task_tgid_nr(tsk);
 			if (uid_valid(tsk->loginuid))
-- 
cgit v1.2.3


From c248c64387fac5a6b31b343d9acb78f478e8619c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 9 Mar 2017 13:26:07 +0200
Subject: nvme-rdma: handle cpu unplug when re-establishing the controller

If a cpu unplug event has occured, we need to take the minimum
of the provided nr_io_queues and the number of online cpus,
otherwise we won't be able to connect them as blk-mq mapping
won't dispatch to those queues.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 779f516e7a4e..47a479f26e5d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -343,8 +343,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
 	struct ib_device *ibdev = dev->dev;
 	int ret;
 
-	BUG_ON(queue_idx >= ctrl->queue_count);
-
 	ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
 			DMA_TO_DEVICE);
 	if (ret)
@@ -652,8 +650,22 @@ out_free_queues:
 
 static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int i, ret;
 
+	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+	if (ret)
+		return ret;
+
+	ctrl->queue_count = nr_io_queues + 1;
+	if (ctrl->queue_count < 2)
+		return 0;
+
+	dev_info(ctrl->ctrl.device,
+		"creating %d I/O queues.\n", nr_io_queues);
+
 	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvme_rdma_init_queue(ctrl, i,
 					   ctrl->ctrl.opts->queue_size);
@@ -1791,20 +1803,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 
 static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
-	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-	if (ret)
-		return ret;
-
-	ctrl->queue_count = opts->nr_io_queues + 1;
-	if (ctrl->queue_count < 2)
-		return 0;
-
-	dev_info(ctrl->ctrl.device,
-		"creating %d I/O queues.\n", opts->nr_io_queues);
-
 	ret = nvme_rdma_init_io_queues(ctrl);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3


From 945dd5bacc8978439af276976b5dcbbd42333dbc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 13 Mar 2017 13:27:51 +0200
Subject: nvme-loop: handle cpu unplug when re-establishing the controller

If a cpu unplug event has occured, we need to take the minimum
of the provided nr_io_queues and the number of online cpus,
otherwise we won't be able to connect them as blk-mq mapping
won't dispatch to those queues.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 88 ++++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 74e04a0855d4..22f7bc6bac7f 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
 		struct nvme_loop_iod *iod, unsigned int queue_idx)
 {
-	BUG_ON(queue_idx >= ctrl->queue_count);
-
 	iod->req.cmd = &iod->cmd;
 	iod->req.rsp = &iod->rsp;
 	iod->queue = &ctrl->queues[queue_idx];
@@ -314,6 +312,43 @@ free_ctrl:
 	kfree(ctrl);
 }
 
+static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+	int i;
+
+	for (i = 1; i < ctrl->queue_count; i++)
+		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+}
+
+static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
+	int ret, i;
+
+	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+	if (ret || !nr_io_queues)
+		return ret;
+
+	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
+
+	for (i = 1; i <= nr_io_queues; i++) {
+		ctrl->queues[i].ctrl = ctrl;
+		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+		if (ret)
+			goto out_destroy_queues;
+
+		ctrl->queue_count++;
+	}
+
+	return 0;
+
+out_destroy_queues:
+	nvme_loop_destroy_io_queues(ctrl);
+	return ret;
+}
+
 static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 {
 	int error;
@@ -385,17 +420,13 @@ out_free_sq:
 
 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
-	int i;
-
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	if (ctrl->queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
-
-		for (i = 1; i < ctrl->queue_count; i++)
-			nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+		nvme_loop_destroy_io_queues(ctrl);
 	}
 
 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -467,19 +498,14 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto out_disable;
 
-	for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
-		ctrl->queues[i].ctrl = ctrl;
-		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
-		if (ret)
-			goto out_free_queues;
-
-		ctrl->queue_count++;
-	}
+	ret = nvme_loop_init_io_queues(ctrl);
+	if (ret)
+		goto out_destroy_admin;
 
-	for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
+	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
-			goto out_free_queues;
+			goto out_destroy_io;
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -492,9 +518,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 
 	return;
 
-out_free_queues:
-	for (i = 1; i < ctrl->queue_count; i++)
-		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+out_destroy_io:
+	nvme_loop_destroy_io_queues(ctrl);
+out_destroy_admin:
 	nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
@@ -533,25 +559,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 {
-	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret, i;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-	if (ret || !opts->nr_io_queues)
+	ret = nvme_loop_init_io_queues(ctrl);
+	if (ret)
 		return ret;
 
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-		opts->nr_io_queues);
-
-	for (i = 1; i <= opts->nr_io_queues; i++) {
-		ctrl->queues[i].ctrl = ctrl;
-		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
-		if (ret)
-			goto out_destroy_queues;
-
-		ctrl->queue_count++;
-	}
-
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
 	ctrl->tag_set.ops = &nvme_loop_mq_ops;
 	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
@@ -575,7 +588,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 		goto out_free_tagset;
 	}
 
-	for (i = 1; i <= opts->nr_io_queues; i++) {
+	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
 			goto out_cleanup_connect_q;
@@ -588,8 +601,7 @@ out_cleanup_connect_q:
 out_free_tagset:
 	blk_mq_free_tag_set(&ctrl->tag_set);
 out_destroy_queues:
-	for (i = 1; i < ctrl->queue_count; i++)
-		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+	nvme_loop_destroy_io_queues(ctrl);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 0ca10b60ceeb5372da01798ca68c116ae45a6eb6 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 20 Mar 2017 11:25:16 +0100
Subject: reset: fix optional reset_control_get stubs to return NULL

When RESET_CONTROLLER is not enabled, the optional reset_control_get
stubs should now also return NULL.

Since it is now valid for reset_control_assert/deassert/reset/status/put
to be called unconditionally, with NULL as an argument for optional
resets, the stubs are not allowed to warn anymore.

Fixes: bb475230b8e5 ("reset: make optional functions really optional")
Reported-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Andrzej Hajda <a.hajda@samsung.com>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Cc: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 86b4ed75359e..96fb139bdd08 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -31,31 +31,26 @@ static inline int device_reset_optional(struct device *dev)
 
 static inline int reset_control_reset(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_assert(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_deassert(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_status(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline void reset_control_put(struct reset_control *rstc)
 {
-	WARN_ON(1);
 }
 
 static inline int __must_check device_reset(struct device *dev)
@@ -74,14 +69,14 @@ static inline struct reset_control *__of_reset_control_get(
 					const char *id, int index, bool shared,
 					bool optional)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev, const char *id,
 					int index, bool shared, bool optional)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 #endif /* CONFIG_RESET_CONTROLLER */
-- 
cgit v1.2.3


From 713cc9df6473f0cc8d699987d990482d432c0679 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 21 Mar 2017 18:04:26 +0000
Subject: arm64: compat: Update compat syscalls

Hook up three pkey syscalls (which we don't implement) and the new statx
syscall, as has been done for arch/arm/.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/unistd.h   | 2 +-
 arch/arm64/include/asm/unistd32.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index e78ac26324bd..bdbeb06dc11e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
 
-#define __NR_compat_syscalls		394
+#define __NR_compat_syscalls		398
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index b7e8ef16ff0d..c66b51aab195 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -809,6 +809,14 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
 __SYSCALL(__NR_preadv2, compat_sys_preadv2)
 #define __NR_pwritev2 393
 __SYSCALL(__NR_pwritev2, compat_sys_pwritev2)
+#define __NR_pkey_mprotect 394
+__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
+#define __NR_pkey_alloc 395
+__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
+#define __NR_pkey_free 396
+__SYSCALL(__NR_pkey_free, sys_pkey_free)
+#define __NR_statx 397
+__SYSCALL(__NR_statx, sys_statx)
 
 /*
  * Please add new compat syscalls above this comment and update
-- 
cgit v1.2.3


From 65b1adebfe43c642dfe3b109edb5d992db5fbe72 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 21 Mar 2017 13:19:09 -0600
Subject: vfio: Rework group release notifier warning

The intent of the original warning is make sure that the mdev vendor
driver has removed any group notifiers at the point where the group
is closed by the user.  Theoretically this would be through an
orderly shutdown where any devices are release prior to the group
release.  We can't always count on an orderly shutdown, the user can
close the group before the notifier can be removed or the user task
might be killed.  We'd like to add this sanity test when the group is
idle and the only references are from the devices within the group
themselves, but we don't have a good way to do that.  Instead check
both when the group itself is removed and when the group is opened.
A bit later than we'd prefer, but better than the current over
aggressive approach.

Fixes: ccd46dbae77d ("vfio: support notifier chain in vfio_group")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: <stable@vger.kernel.org> # v4.10
Cc: Jike Song <jike.song@intel.com>
---
 drivers/vfio/vfio.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 609f4f982c74..561084ab387f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -403,6 +403,7 @@ static void vfio_group_release(struct kref *kref)
 	struct iommu_group *iommu_group = group->iommu_group;
 
 	WARN_ON(!list_empty(&group->device_list));
+	WARN_ON(group->notifier.head);
 
 	list_for_each_entry_safe(unbound, tmp,
 				 &group->unbound_list, unbound_next) {
@@ -1573,6 +1574,10 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 		return -EBUSY;
 	}
 
+	/* Warn if previous user didn't cleanup and re-init to drop them */
+	if (WARN_ON(group->notifier.head))
+		BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
 	filep->private_data = group;
 
 	return 0;
@@ -1584,9 +1589,6 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 
 	filep->private_data = NULL;
 
-	/* Any user didn't unregister? */
-	WARN_ON(group->notifier.head);
-
 	vfio_group_try_dissolve_container(group);
 
 	atomic_dec(&group->opened);
-- 
cgit v1.2.3


From 093b995e3b55a0ae0670226ddfcb05bfbf0099ae Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 20 Mar 2017 14:26:42 +0800
Subject: mm, swap: Remove WARN_ON_ONCE() in free_swap_slot()

Before commit 452b94b8c8c7 ("mm/swap: don't BUG_ON() due to
uninitialized swap slot cache"), the following bug is reported,

  ------------[ cut here ]------------
  kernel BUG at mm/swap_slots.c:270!
  invalid opcode: 0000 [#1] SMP
  CPU: 5 PID: 1745 Comm: (sd-pam) Not tainted 4.11.0-rc1-00243-g24c534bb161b #1
  Hardware name: System manufacturer System Product Name/Z170-K, BIOS 1803 05/06/2016
  RIP: 0010:free_swap_slot+0xba/0xd0
  Call Trace:
   swap_free+0x36/0x40
   do_swap_page+0x360/0x6d0
   __handle_mm_fault+0x880/0x1080
   handle_mm_fault+0xd0/0x240
   __do_page_fault+0x232/0x4d0
   do_page_fault+0x20/0x70
   page_fault+0x22/0x30
  ---[ end trace aefc9ede53e0ab21 ]---

This is raised by the BUG_ON(!swap_slot_cache_initialized) in
free_swap_slot().  This is incorrect, because even if the swap slots
cache fails to be initialized, the swap should operate properly without
the swap slots cache.  And the use_swap_slot_cache check later in the
function will protect the uninitialized swap slots cache case.

In commit 452b94b8c8c7, the BUG_ON() is replaced by WARN_ON_ONCE().  In
the patch, the WARN_ON_ONCE() is removed too.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_slots.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 7ebb23836f68..b1ccb58ad397 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
 {
 	struct swap_slots_cache *cache;
 
-	WARN_ON_ONCE(!swap_slot_cache_initialized);
-
 	cache = &get_cpu_var(swp_slots);
 	if (use_swap_slot_cache && cache->slots_ret) {
 		spin_lock_irq(&cache->free_lock);
-- 
cgit v1.2.3


From 64897b20eed29cee2b998fb5ba362e65523dba3c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 21 Mar 2017 22:19:07 +0100
Subject: cpufreq: intel_pstate: Fix policy data management in passive mode

The policy->cpuinfo.max_freq and policy->max updates in
intel_cpufreq_turbo_update() are excessive as they are done for no
good reason and may lead to problems in principle, so they should be
dropped.  However, after dropping them intel_cpufreq_turbo_update()
becomes almost entirely pointless, because the check made by it is
made again down the road in intel_pstate_prepare_request().  The
only thing in it that still needs to be done is the call to
update_turbo_state(), so drop intel_cpufreq_turbo_update() altogether
and make its callers invoke update_turbo_state() directly instead of
it.

In addition to that, fix intel_cpufreq_verify_policy() so that it
checks global.no_turbo in addition to global.turbo_disabled when
updating policy->cpuinfo.max_freq to make it consistent with
intel_pstate_verify_policy().

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7b07803e7042..283491f742d3 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2257,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.turbo_disabled ?
+	policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
 			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 
 	cpufreq_verify_within_cpu_limits(policy);
@@ -2265,26 +2265,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
-					       struct cpufreq_policy *policy,
-					       unsigned int target_freq)
-{
-	unsigned int max_freq;
-
-	update_turbo_state();
-
-	max_freq = global.no_turbo || global.turbo_disabled ?
-			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
-	policy->cpuinfo.max_freq = max_freq;
-	if (policy->max > max_freq)
-		policy->max = max_freq;
-
-	if (target_freq > max_freq)
-		target_freq = max_freq;
-
-	return target_freq;
-}
-
 static int intel_cpufreq_target(struct cpufreq_policy *policy,
 				unsigned int target_freq,
 				unsigned int relation)
@@ -2293,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
 	struct cpufreq_freqs freqs;
 	int target_pstate;
 
+	update_turbo_state();
+
 	freqs.old = policy->cur;
-	freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+	freqs.new = target_freq;
 
 	cpufreq_freq_transition_begin(policy, &freqs);
 	switch (relation) {
@@ -2326,7 +2308,8 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 	int target_pstate;
 
-	target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+	update_turbo_state();
+
 	target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
 	intel_pstate_update_pstate(cpu, target_pstate);
-- 
cgit v1.2.3


From ad0a45fd9c14feebd000b6e84189d0edff265170 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Sun, 19 Mar 2017 00:51:59 +0530
Subject: cpuidle: Validate cpu_dev in cpuidle_add_sysfs()

If a given cpu is not in cpu_present and cpu hotplug
is disabled, arch can skip setting up the cpu_dev.

Arch cpuidle driver should pass correct cpu mask
for registration, but failing to do so by the driver
causes error to propagate and crash like this:

[   30.076045] Unable to handle kernel paging request for data at address 0x00000048
[   30.076100] Faulting instruction address: 0xc0000000007b2f30
cpu 0x4d: Vector: 300 (Data Access) at [c000003feb18b670]
    pc: c0000000007b2f30: kobject_get+0x20/0x70
    lr: c0000000007b3c94: kobject_add_internal+0x54/0x3f0
    sp: c000003feb18b8f0
   msr: 9000000000009033
   dar: 48
 dsisr: 40000000
  current = 0xc000003fd2ed8300
  paca    = 0xc00000000fbab500   softe: 0        irq_happened: 0x01
    pid   = 1, comm = swapper/0
Linux version 4.11.0-rc2-svaidy+ (sv@sagarika) (gcc version 6.2.0
20161005 (Ubuntu 6.2.0-5ubuntu12) ) #10 SMP Sun Mar 19 00:08:09 IST 2017
enter ? for help
[c000003feb18b960] c0000000007b3c94 kobject_add_internal+0x54/0x3f0
[c000003feb18b9f0] c0000000007b43a4 kobject_init_and_add+0x64/0xa0
[c000003feb18ba70] c000000000e284f4 cpuidle_add_sysfs+0xb4/0x130
[c000003feb18baf0] c000000000e26038 cpuidle_register_device+0x118/0x1c0
[c000003feb18bb30] c000000000e26c48 cpuidle_register+0x78/0x120
[c000003feb18bbc0] c00000000168fd9c powernv_processor_idle_init+0x110/0x1c4
[c000003feb18bc40] c00000000000cff8 do_one_initcall+0x68/0x1d0
[c000003feb18bd00] c0000000016242f4 kernel_init_freeable+0x280/0x360
[c000003feb18bdc0] c00000000000d864 kernel_init+0x24/0x160
[c000003feb18be30] c00000000000b4e8 ret_from_kernel_thread+0x5c/0x74

Validating cpu_dev fixes the crash and reports correct error message like:

[   30.163506] Failed to register cpuidle device for cpu136
[   30.173329] Registration of powernv driver failed.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
[ rjw: Comment massage ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/sysfs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index c5adc8c9ac43..ae948b1da93a 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -615,6 +615,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
 	int error;
 
+	/*
+	 * Return if cpu_device is not setup for this CPU.
+	 *
+	 * This could happen if the arch did not set up cpu_device
+	 * since this CPU is not in cpu_present mask and the
+	 * driver did not send a correct CPU mask during registration.
+	 * Without this check we would end up passing bogus
+	 * value for &cpu_dev->kobj in kobject_init_and_add()
+	 */
+	if (!cpu_dev)
+		return -ENODEV;
+
 	kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
 	if (!kdev)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 98d068ab52b4b11d403995ed14154660797e7136 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Tue, 14 Mar 2017 14:15:20 +0800
Subject: r8152: fix the list rx_done may be used without initialization

The list rx_done would be initialized when the linking on occurs.
Therefore, if a napi is scheduled without any linking on before,
the following kernel panic would happen.

	BUG: unable to handle kernel NULL pointer dereference at 000000000000008
	IP: [<ffffffffc085efde>] r8152_poll+0xe1e/0x1210 [r8152]
	PGD 0
	Oops: 0002 [#1] SMP

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 986243c932cc..bb3eedd07fbe 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1362,6 +1362,7 @@ static int alloc_all_mem(struct r8152 *tp)
 	spin_lock_init(&tp->rx_lock);
 	spin_lock_init(&tp->tx_lock);
 	INIT_LIST_HEAD(&tp->tx_free);
+	INIT_LIST_HEAD(&tp->rx_done);
 	skb_queue_head_init(&tp->tx_queue);
 	skb_queue_head_init(&tp->rx_queue);
 
-- 
cgit v1.2.3


From 8a0f5ccfb33b0b8b51de65b7b3bf342ba10b4fb6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Mar 2017 18:25:57 +0800
Subject: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

On Tue, Mar 14, 2017 at 10:44:10AM +0100, Dmitry Vyukov wrote:
>
> Yes, please.
> Disregarding some reports is not a good way long term.

Please try this patch.

---8<---
Subject: netlink: Annotate nlk cb_mutex by protocol

Currently all occurences of nlk->cb_mutex are annotated by lockdep
as a single class.  This causes a false lcokdep cycle involving
genl and crypto_user.

This patch fixes it by dividing cb_mutex into individual classes
based on the netlink protocol.  As genl and crypto_user do not
use the same netlink protocol this breaks the false dependency
loop.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7b73c7c161a9..596eaff66649 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table);
 
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
+static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
+
+static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
+	"nlk_cb_mutex-ROUTE",
+	"nlk_cb_mutex-1",
+	"nlk_cb_mutex-USERSOCK",
+	"nlk_cb_mutex-FIREWALL",
+	"nlk_cb_mutex-SOCK_DIAG",
+	"nlk_cb_mutex-NFLOG",
+	"nlk_cb_mutex-XFRM",
+	"nlk_cb_mutex-SELINUX",
+	"nlk_cb_mutex-ISCSI",
+	"nlk_cb_mutex-AUDIT",
+	"nlk_cb_mutex-FIB_LOOKUP",
+	"nlk_cb_mutex-CONNECTOR",
+	"nlk_cb_mutex-NETFILTER",
+	"nlk_cb_mutex-IP6_FW",
+	"nlk_cb_mutex-DNRTMSG",
+	"nlk_cb_mutex-KOBJECT_UEVENT",
+	"nlk_cb_mutex-GENERIC",
+	"nlk_cb_mutex-17",
+	"nlk_cb_mutex-SCSITRANSPORT",
+	"nlk_cb_mutex-ECRYPTFS",
+	"nlk_cb_mutex-RDMA",
+	"nlk_cb_mutex-CRYPTO",
+	"nlk_cb_mutex-SMC",
+	"nlk_cb_mutex-23",
+	"nlk_cb_mutex-24",
+	"nlk_cb_mutex-25",
+	"nlk_cb_mutex-26",
+	"nlk_cb_mutex-27",
+	"nlk_cb_mutex-28",
+	"nlk_cb_mutex-29",
+	"nlk_cb_mutex-30",
+	"nlk_cb_mutex-31",
+	"nlk_cb_mutex-MAX_LINKS"
+};
+
 static int netlink_dump(struct sock *sk);
 static void netlink_skb_destructor(struct sk_buff *skb);
 
@@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
 	} else {
 		nlk->cb_mutex = &nlk->cb_def_mutex;
 		mutex_init(nlk->cb_mutex);
+		lockdep_set_class_and_name(nlk->cb_mutex,
+					   nlk_cb_mutex_keys + protocol,
+					   nlk_cb_mutex_key_strings[protocol]);
 	}
 	init_waitqueue_head(&nlk->wait);
 
-- 
cgit v1.2.3


From 36d277bac8080202684e67162ebb157f16631581 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:14 +0800
Subject: vsock: track pkt owner vsock

So that we can cancel a queued pkt later if necessary.

Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h            | 3 +++
 net/vmw_vsock/virtio_transport_common.c | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 9638bfeb0d1f..584f9a647ad4 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -48,6 +48,8 @@ struct virtio_vsock_pkt {
 	struct virtio_vsock_hdr	hdr;
 	struct work_struct work;
 	struct list_head list;
+	/* socket refcnt not held, only use for cancellation */
+	struct vsock_sock *vsk;
 	void *buf;
 	u32 len;
 	u32 off;
@@ -56,6 +58,7 @@ struct virtio_vsock_pkt {
 
 struct virtio_vsock_pkt_info {
 	u32 remote_cid, remote_port;
+	struct vsock_sock *vsk;
 	struct msghdr *msg;
 	u32 pkt_len;
 	u16 type;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 8d592a45b597..af087b44ceea 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
 	pkt->len		= len;
 	pkt->hdr.len		= cpu_to_le32(len);
 	pkt->reply		= info->reply;
+	pkt->vsk		= info->vsk;
 
 	if (info->msg && len > 0) {
 		pkt->buf = kmalloc(len, GFP_KERNEL);
@@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
 		.type = type,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk)
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_REQUEST,
 		.type = VIRTIO_VSOCK_TYPE_STREAM,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
 			  VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
 			 (mode & SEND_SHUTDOWN ?
 			  VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
 		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.msg = msg,
 		.pkt_len = len,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
 		.op = VIRTIO_VSOCK_OP_RST,
 		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.reply = !!pkt,
+		.vsk = vsk,
 	};
 
 	/* Send RST only if the original pkt is not a RST pkt */
@@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk,
 		.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
 		.remote_port = le32_to_cpu(pkt->hdr.src_port),
 		.reply = true,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
-- 
cgit v1.2.3


From 16320f363ae128d9b9c70e60f00f2a572f57c23d Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:15 +0800
Subject: vhost-vsock: add pkt cancel capability

To allow canceling all packets of a connection.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vsock.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 include/net/af_vsock.h |  3 +++
 2 files changed, 44 insertions(+)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index ce5e63d2c66a..44eed8eb0725 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -223,6 +223,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 	return len;
 }
 
+static int
+vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+	struct vhost_vsock *vsock;
+	struct virtio_vsock_pkt *pkt, *n;
+	int cnt = 0;
+	LIST_HEAD(freeme);
+
+	/* Find the vhost_vsock according to guest context id  */
+	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+	if (!vsock)
+		return -ENODEV;
+
+	spin_lock_bh(&vsock->send_pkt_list_lock);
+	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+		if (pkt->vsk != vsk)
+			continue;
+		list_move(&pkt->list, &freeme);
+	}
+	spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+	list_for_each_entry_safe(pkt, n, &freeme, list) {
+		if (pkt->reply)
+			cnt++;
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	if (cnt) {
+		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+		int new_cnt;
+
+		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+			vhost_poll_queue(&tx_vq->poll);
+	}
+
+	return 0;
+}
+
 static struct virtio_vsock_pkt *
 vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		      unsigned int out, unsigned int in)
@@ -675,6 +715,7 @@ static struct virtio_transport vhost_transport = {
 		.release                  = virtio_transport_release,
 		.connect                  = virtio_transport_connect,
 		.shutdown                 = virtio_transport_shutdown,
+		.cancel_pkt               = vhost_transport_cancel_pkt,
 
 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f2758964ce6f..f32ed9ac181a 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -100,6 +100,9 @@ struct vsock_transport {
 	void (*destruct)(struct vsock_sock *);
 	void (*release)(struct vsock_sock *);
 
+	/* Cancel all pending packets sent on vsock. */
+	int (*cancel_pkt)(struct vsock_sock *vsk);
+
 	/* Connections. */
 	int (*connect)(struct vsock_sock *);
 
-- 
cgit v1.2.3


From 073b4f2c50fe67c7c66a059a4d6db52bb1465490 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:16 +0800
Subject: vsock: add pkt cancel capability

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport.c | 42 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 9d24c0e958b1..68675a151f22 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -213,6 +213,47 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 	return len;
 }
 
+static int
+virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+	struct virtio_vsock *vsock;
+	struct virtio_vsock_pkt *pkt, *n;
+	int cnt = 0;
+	LIST_HEAD(freeme);
+
+	vsock = virtio_vsock_get();
+	if (!vsock) {
+		return -ENODEV;
+	}
+
+	spin_lock_bh(&vsock->send_pkt_list_lock);
+	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+		if (pkt->vsk != vsk)
+			continue;
+		list_move(&pkt->list, &freeme);
+	}
+	spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+	list_for_each_entry_safe(pkt, n, &freeme, list) {
+		if (pkt->reply)
+			cnt++;
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	if (cnt) {
+		struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+		int new_cnt;
+
+		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+		if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) &&
+		    new_cnt < virtqueue_get_vring_size(rx_vq))
+			queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+	}
+
+	return 0;
+}
+
 static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
 {
 	int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
@@ -462,6 +503,7 @@ static struct virtio_transport virtio_transport = {
 		.release                  = virtio_transport_release,
 		.connect                  = virtio_transport_connect,
 		.shutdown                 = virtio_transport_shutdown,
+		.cancel_pkt               = virtio_transport_cancel_pkt,
 
 		.dgram_bind               = virtio_transport_dgram_bind,
 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
-- 
cgit v1.2.3


From 380feae0def7e6a115124a3219c3ec9b654dca32 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:17 +0800
Subject: vsock: cancel packets when failing to connect

Otherwise we'll leave the packets queued until releasing vsock device.
E.g., if guest is slow to start up, resulting ETIMEDOUT on connect, guest
will get the connect requests from failed host sockets.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9f770f33c100..6f7f6757ceef 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1102,10 +1102,19 @@ static const struct proto_ops vsock_dgram_ops = {
 	.sendpage = sock_no_sendpage,
 };
 
+static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+	if (!transport->cancel_pkt)
+		return -EOPNOTSUPP;
+
+	return transport->cancel_pkt(vsk);
+}
+
 static void vsock_connect_timeout(struct work_struct *work)
 {
 	struct sock *sk;
 	struct vsock_sock *vsk;
+	int cancel = 0;
 
 	vsk = container_of(work, struct vsock_sock, dwork.work);
 	sk = sk_vsock(vsk);
@@ -1116,8 +1125,11 @@ static void vsock_connect_timeout(struct work_struct *work)
 		sk->sk_state = SS_UNCONNECTED;
 		sk->sk_err = ETIMEDOUT;
 		sk->sk_error_report(sk);
+		cancel = 1;
 	}
 	release_sock(sk);
+	if (cancel)
+		vsock_transport_cancel_pkt(vsk);
 
 	sock_put(sk);
 }
@@ -1224,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 			err = sock_intr_errno(timeout);
 			sk->sk_state = SS_UNCONNECTED;
 			sock->state = SS_UNCONNECTED;
+			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;
 		} else if (timeout == 0) {
 			err = -ETIMEDOUT;
 			sk->sk_state = SS_UNCONNECTED;
 			sock->state = SS_UNCONNECTED;
+			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;
 		}
 
-- 
cgit v1.2.3


From 7df9c24625b9981779afb8fcdbe2bb4765e61147 Mon Sep 17 00:00:00 2001
From: Andrey Ulanov <andreyu@google.com>
Date: Tue, 14 Mar 2017 20:16:42 -0700
Subject: net: unix: properly re-increment inflight counter of GC discarded
 candidates

Dmitry has reported that a BUG_ON() condition in unix_notinflight()
may be triggered by a simple code that forwards unix socket in an
SCM_RIGHTS message.
That is caused by incorrect unix socket GC implementation in unix_gc().

The GC first collects list of candidates, then (a) decrements their
"children's" inflight counter, (b) checks which inflight counters are
now 0, and then (c) increments all inflight counters back.
(a) and (c) are done by calling scan_children() with inc_inflight or
dec_inflight as the second argument.

Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage
collector") changed scan_children() such that it no longer considers
sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block
of code that that unsets this flag _before_ invoking
scan_children(, dec_iflight, ). This may lead to incorrect inflight
counters for some sockets.

This change fixes this bug by changing order of operations:
UNIX_GC_CANDIDATE is now unset only after all inflight counters are
restored to the original state.

  kernel BUG at net/unix/garbage.c:149!
  RIP: 0010:[<ffffffff8717ebf4>]  [<ffffffff8717ebf4>]
  unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149
  Call Trace:
   [<ffffffff8716cfbf>] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487
   [<ffffffff8716f6a9>] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496
   [<ffffffff86a90a01>] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655
   [<ffffffff86a9808a>] skb_release_all+0x1a/0x60 net/core/skbuff.c:668
   [<ffffffff86a980ea>] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684
   [<ffffffff86a98284>] kfree_skb+0x184/0x570 net/core/skbuff.c:705
   [<ffffffff871789d5>] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559
   [<ffffffff87179039>] unix_release+0x49/0x90 net/unix/af_unix.c:836
   [<ffffffff86a694b2>] sock_release+0x92/0x1f0 net/socket.c:570
   [<ffffffff86a6962b>] sock_close+0x1b/0x20 net/socket.c:1017
   [<ffffffff81a76b8e>] __fput+0x34e/0x910 fs/file_table.c:208
   [<ffffffff81a771da>] ____fput+0x1a/0x20 fs/file_table.c:244
   [<ffffffff81483ab0>] task_work_run+0x1a0/0x280 kernel/task_work.c:116
   [<     inline     >] exit_task_work include/linux/task_work.h:21
   [<ffffffff8141287a>] do_exit+0x183a/0x2640 kernel/exit.c:828
   [<ffffffff8141383e>] do_group_exit+0x14e/0x420 kernel/exit.c:931
   [<ffffffff814429d3>] get_signal+0x663/0x1880 kernel/signal.c:2307
   [<ffffffff81239b45>] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807
   [<ffffffff8100666a>] exit_to_usermode_loop+0x1ea/0x2d0
  arch/x86/entry/common.c:156
   [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
   [<ffffffff81009693>] syscall_return_slowpath+0x4d3/0x570
  arch/x86/entry/common.c:259
   [<ffffffff881478e6>] entry_SYSCALL_64_fastpath+0xc4/0xc6

Link: https://lkml.org/lkml/2017/3/6/252
Signed-off-by: Andrey Ulanov <andreyu@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/garbage.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6a0d48525fcf..c36757e72844 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
+		BUG_ON(!atomic_long_read(&u->inflight));
 		BUG_ON(list_empty(&u->link));
 
 		if (atomic_long_dec_and_test(&u->inflight))
@@ -341,6 +342,14 @@ void unix_gc(void)
 	}
 	list_del(&cursor);
 
+	/* Now gc_candidates contains only garbage.  Restore original
+	 * inflight counters for these as well, and remove the skbuffs
+	 * which are creating the cycle(s).
+	 */
+	skb_queue_head_init(&hitlist);
+	list_for_each_entry(u, &gc_candidates, link)
+		scan_children(&u->sk, inc_inflight, &hitlist);
+
 	/* not_cycle_list contains those sockets which do not make up a
 	 * cycle.  Restore these to the inflight list.
 	 */
@@ -350,14 +359,6 @@ void unix_gc(void)
 		list_move_tail(&u->link, &gc_inflight_list);
 	}
 
-	/* Now gc_candidates contains only garbage.  Restore original
-	 * inflight counters for these as well, and remove the skbuffs
-	 * which are creating the cycle(s).
-	 */
-	skb_queue_head_init(&hitlist);
-	list_for_each_entry(u, &gc_candidates, link)
-	scan_children(&u->sk, inc_inflight, &hitlist);
-
 	spin_unlock(&unix_gc_lock);
 
 	/* Here we are. Hitlist is filled. Die. */
-- 
cgit v1.2.3


From 09050957fae896e001498af1aa35c446a11cb47d Mon Sep 17 00:00:00 2001
From: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Date: Thu, 16 Mar 2017 22:44:10 +0300
Subject: tun: fix inability to set offloads after disabling them via ethtool

Added missing logic in tun driver, which prevents apps to set
offloads using tun ioctl, if offloads were previously disabled via ethtool

Signed-off-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 34cc3c590aa5..cc88cd7856f5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1931,6 +1931,8 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
 		return -EINVAL;
 
 	tun->set_features = features;
+	tun->dev->wanted_features &= ~TUN_USER_FEATURES;
+	tun->dev->wanted_features |= features;
 	netdev_update_features(tun->dev);
 
 	return 0;
-- 
cgit v1.2.3


From aea92fb2e09e29653b023d4254ac9fbf94221538 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 17 Mar 2017 08:05:28 -0700
Subject: sch_dsmark: fix invalid skb_cow() usage

skb_cow(skb, sizeof(ip header)) is not very helpful in this context.

First we need to use pskb_may_pull() to make sure the ip header
is in skb linear part, then use skb_try_make_writable() to
address clones issues.

Fixes: 4c30719f4f55 ("[PKT_SCHED] dsmark: handle cloned and non-linear skb's")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 802ac7c2e5e8..5334e309f17f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -201,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
 
 	if (p->set_tc_index) {
+		int wlen = skb_network_offset(skb);
+
 		switch (tc_skb_protocol(skb)) {
 		case htons(ETH_P_IP):
-			if (skb_cow_head(skb, sizeof(struct iphdr)))
+			wlen += sizeof(struct iphdr);
+			if (!pskb_may_pull(skb, wlen) ||
+			    skb_try_make_writable(skb, wlen))
 				goto drop;
 
 			skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -211,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			break;
 
 		case htons(ETH_P_IPV6):
-			if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+			wlen += sizeof(struct ipv6hdr);
+			if (!pskb_may_pull(skb, wlen) ||
+			    skb_try_make_writable(skb, wlen))
 				goto drop;
 
 			skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
-- 
cgit v1.2.3


From 6bd845d1cf98b45c634baacb8381436dad3c2dd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 17 Mar 2017 17:20:48 +0100
Subject: qmi_wwan: add Dell DW5811e
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a Dell branded Sierra Wireless EM7455. It is operating in
MBIM mode by default, but can be configured to provide two QMI/RMNET
functions.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 805674550683..f8d55aa058ec 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -925,6 +925,8 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81a9, 8)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b1, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b3, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+	{QMI_FIXED_INTF(0x413c, 0x81b6, 8)},	/* Dell Wireless 5811e */
+	{QMI_FIXED_INTF(0x413c, 0x81b6, 10)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */
-- 
cgit v1.2.3


From 13e2d5187f6b965ba3556caedb914baf81b98ed2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 17 Mar 2017 23:52:35 +0300
Subject: bna: integer overflow bug in debugfs

We could allocate less memory than intended because we do:

	bnad->regdata = kzalloc(len << 2, GFP_KERNEL);

The shift can overflow leading to a crash.  This is debugfs code so the
impact is very small.

Fixes: 7afc5dbde091 ("bna: Add debugfs interface.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Rasesh Mody <rasesh.mody@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 05c1c1dd7751..cebfe3bd086e 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
 		return PTR_ERR(kern_buf);
 
 	rc = sscanf(kern_buf, "%x:%x", &addr, &len);
-	if (rc < 2) {
+	if (rc < 2 || len > UINT_MAX >> 2) {
 		netdev_warn(bnad->netdev, "failed to read user buffer\n");
 		kfree(kern_buf);
 		return -EINVAL;
-- 
cgit v1.2.3


From 3dc857f0e8fc22610a59cbb346ba62c6e921863f Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 17 Mar 2017 16:07:11 -0700
Subject: net: vrf: Reset rt6i_idev in local dst after put

The VRF driver takes a reference to the inet6_dev on the VRF device for
its rt6_local dst when handling local traffic through the VRF device as
a loopback. When the device is deleted the driver does a put on the idev
but does not reset rt6i_idev in the rt6_info struct. When the dst is
destroyed, dst_destroy calls ip6_dst_destroy which does a second put for
what is essentially the same reference causing it to be prematurely freed.
Reset rt6i_idev after the put in the vrf driver.

Fixes: b4869aa2f881e ("net: vrf: ipv6 support for local traffic to
                       local addresses")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index fea687f35b5a..d6988db1930d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -462,8 +462,10 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 	}
 
 	if (rt6_local) {
-		if (rt6_local->rt6i_idev)
+		if (rt6_local->rt6i_idev) {
 			in6_dev_put(rt6_local->rt6i_idev);
+			rt6_local->rt6i_idev = NULL;
+		}
 
 		dst = &rt6_local->dst;
 		dev_put(dst->dev);
-- 
cgit v1.2.3


From 486a43db2e26b87125b5629e1ade516f90833934 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 18 Mar 2017 19:12:22 +0800
Subject: sctp: remove temporary variable confirm from sctp_packet_transmit

Commit c86a773c7802 ("sctp: add dst_pending_confirm flag") introduced
a temporary variable "confirm" in sctp_packet_transmit.

But it broke the rule that longer lines should be above shorter ones.
Besides, this variable is not necessary, so this patch is to just
remove it and use tp->dst_pending_confirm directly.

Fixes: c86a773c7802 ("sctp: add dst_pending_confirm flag")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 71ce6b945dcb..1224421036b3 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -546,7 +546,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	struct sctp_association *asoc = tp->asoc;
 	struct sctp_chunk *chunk, *tmp;
 	int pkt_count, gso = 0;
-	int confirm;
 	struct dst_entry *dst;
 	struct sk_buff *head;
 	struct sctphdr *sh;
@@ -625,13 +624,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 			asoc->peer.last_sent_to = tp;
 	}
 	head->ignore_df = packet->ipfragok;
-	confirm = tp->dst_pending_confirm;
-	if (confirm)
+	if (tp->dst_pending_confirm)
 		skb_set_dst_pending_confirm(head, 1);
 	/* neighbour should be confirmed on successful transmission or
 	 * positive error
 	 */
-	if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
+	if (tp->af_specific->sctp_xmit(head, tp) >= 0 &&
+	    tp->dst_pending_confirm)
 		tp->dst_pending_confirm = 0;
 
 out:
-- 
cgit v1.2.3


From 1f904495b79003cd3d881de8731377d48fcbc7e3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 18 Mar 2017 19:27:23 +0800
Subject: sctp: define dst_pending_confirm as a bit in sctp_transport

As tp->dst_pending_confirm's value can only be set 0 or 1, this
patch is to change to define it as a bit instead of __u32.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 07a0b128625a..4f645198e9bd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -753,6 +753,8 @@ struct sctp_transport {
 		/* Is the Path MTU update pending on this tranport */
 		pmtu_pending:1,
 
+		dst_pending_confirm:1,	/* need to confirm neighbour */
+
 		/* Has this transport moved the ctsn since we last sacked */
 		sack_generation:1;
 	u32 dst_cookie;
@@ -806,8 +808,6 @@ struct sctp_transport {
 
 	__u32 burst_limited;	/* Holds old cwnd when max.burst is applied */
 
-	__u32 dst_pending_confirm;	/* need to confirm neighbour */
-
 	/* Destination */
 	struct dst_entry *dst;
 	/* Source address. */
-- 
cgit v1.2.3


From 23bb09cfbe04076ef647da3889a5a5ab6cbe6f15 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 18 Mar 2017 20:03:59 +0800
Subject: sctp: out_qlen should be updated when pruning unsent queue

This patch is to fix the issue that sctp_prsctp_prune_sent forgot
to update q->out_qlen when removing a chunk from unsent queue.

Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index db352e5d61f8..025ccff67072 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 }
 
 static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
-				    struct sctp_sndrcvinfo *sinfo,
-				    struct list_head *queue, int msg_len)
+				    struct sctp_sndrcvinfo *sinfo, int msg_len)
 {
+	struct sctp_outq *q = &asoc->outqueue;
 	struct sctp_chunk *chk, *temp;
 
-	list_for_each_entry_safe(chk, temp, queue, list) {
+	list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
 		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
 		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
 			continue;
 
 		list_del_init(&chk->list);
+		q->out_qlen -= chk->skb->len;
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 
@@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc,
 			return;
 	}
 
-	sctp_prsctp_prune_unsent(asoc, sinfo,
-				 &asoc->outqueue.out_chunk_list,
-				 msg_len);
+	sctp_prsctp_prune_unsent(asoc, sinfo, msg_len);
 }
 
 /* Mark all the eligible packets on a transport for retransmission.  */
-- 
cgit v1.2.3


From ff010472fb75670cb5c08671e820eeea3af59c87 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 21 Mar 2017 11:36:06 +0530
Subject: cpufreq: Restore policy min/max limits on CPU online

On CPU online the cpufreq core restores the previous governor (or
the previous "policy" setting for ->setpolicy drivers), but it does
not restore the min/max limits at the same time, which is confusing,
inconsistent and real pain for users who set the limits and then
suspend/resume the system (using full suspend), in which case the
limits are reset on all CPUs except for the boot one.

Fix this by making cpufreq_online() restore the limits when an inactive
policy is brought online.

The commit log and patch are inspired from Rafael's earlier work.

Reported-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.3+ <stable@vger.kernel.org> # 4.3+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b8ff617d449d..5dbdd261aa73 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1184,6 +1184,9 @@ static int cpufreq_online(unsigned int cpu)
 		for_each_cpu(j, policy->related_cpus)
 			per_cpu(cpufreq_cpu_data, j) = policy;
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	} else {
+		policy->min = policy->user_policy.min;
+		policy->max = policy->user_policy.max;
 	}
 
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
-- 
cgit v1.2.3


From 8605330aac5a5785630aec8f64378a54891937cc Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 18 Mar 2017 17:02:59 -0400
Subject: tcp: fix SCM_TIMESTAMPING_OPT_STATS for normal skbs

__sock_recv_timestamp can be called for both normal skbs (for
receive timestamps) and for skbs on the error queue (for transmit
timestamps).

Commit 1c885808e456
(tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING)
assumes any skb passed to __sock_recv_timestamp are from
the error queue, containing OPT_STATS in the content of the skb.
This results in accessing invalid memory or generating junk
data.

To fix this, set skb->pkt_type to PACKET_OUTGOING for packets
on the error queue. This is safe because on the receive path
on local sockets skb->pkt_type is never set to PACKET_OUTGOING.
With that, copy OPT_STATS from a packet, only if its pkt_type
is PACKET_OUTGOING.

Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 10 ++++++++++
 net/socket.c      | 13 ++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cd4ba8c6b609..b1fbd1958eb6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3694,6 +3694,15 @@ static void sock_rmem_free(struct sk_buff *skb)
 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 }
 
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+	/* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+	 * So, it is safe to (mis)use it to mark skbs on the error queue.
+	 */
+	skb->pkt_type = PACKET_OUTGOING;
+	BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
 /*
  * Note: We dont mem charge error packets (no sk_forward_alloc changes)
  */
@@ -3707,6 +3716,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 	skb->sk = sk;
 	skb->destructor = sock_rmem_free;
 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+	skb_set_err_queue(skb);
 
 	/* before exiting rcu section, make sure dst is refcounted */
 	skb_dst_force(skb);
diff --git a/net/socket.c b/net/socket.c
index e034fe4164be..692d6989d2c2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -652,6 +652,16 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+static bool skb_is_err_queue(const struct sk_buff *skb)
+{
+	/* pkt_type of skbs enqueued on the error queue are set to
+	 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
+	 * in recvmsg, since skbs received on a local socket will never
+	 * have a pkt_type of PACKET_OUTGOING.
+	 */
+	return skb->pkt_type == PACKET_OUTGOING;
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -695,7 +705,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(tss), &tss);
 
-		if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+		if (skb_is_err_queue(skb) && skb->len &&
+		    (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
 				 skb->len, skb->data);
 	}
-- 
cgit v1.2.3


From 4ef1b2869447411ad3ef91ad7d4891a83c1a509a Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 18 Mar 2017 17:03:00 -0400
Subject: tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS

SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled
while packets are collected on the error queue.
So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags
is not enough to safely assume that the skb contains
OPT_STATS data.

Add a bit in sock_exterr_skb to indicate whether the
skb contains opt_stats data.

Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/errqueue.h |  2 ++
 net/core/skbuff.c        | 17 +++++++++++------
 net/socket.c             |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 9ca23fcfb5d7..6fdfc884fdeb 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -20,6 +20,8 @@ struct sock_exterr_skb {
 	struct sock_extended_err	ee;
 	u16				addr_offset;
 	__be16				port;
+	u8				opt_stats:1,
+					unused:7;
 };
 
 #endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1fbd1958eb6..9f781092fda9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3793,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk);
 
 static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 					struct sock *sk,
-					int tstype)
+					int tstype,
+					bool opt_stats)
 {
 	struct sock_exterr_skb *serr;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
 	serr = SKB_EXT_ERR(skb);
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 	serr->ee.ee_info = tstype;
+	serr->opt_stats = opt_stats;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 		if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3843,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	 */
 	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
 		*skb_hwtstamps(skb) = *hwtstamps;
-		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
 		sock_put(sk);
 	}
 }
@@ -3854,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
-	bool tsonly;
+	bool tsonly, opt_stats = false;
 
 	if (!sk)
 		return;
@@ -3867,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 #ifdef CONFIG_INET
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
 		    sk->sk_protocol == IPPROTO_TCP &&
-		    sk->sk_type == SOCK_STREAM)
+		    sk->sk_type == SOCK_STREAM) {
 			skb = tcp_get_timestamping_opt_stats(sk);
-		else
+			opt_stats = true;
+		} else
 #endif
 			skb = alloc_skb(0, GFP_ATOMIC);
 	} else {
@@ -3888,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	else
 		skb->tstamp = ktime_get_real();
 
-	__skb_complete_tx_timestamp(skb, sk, tstype);
+	__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
 }
 EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 
diff --git a/net/socket.c b/net/socket.c
index 692d6989d2c2..985ef06792d6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -706,7 +706,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			 SCM_TIMESTAMPING, sizeof(tss), &tss);
 
 		if (skb_is_err_queue(skb) && skb->len &&
-		    (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+		    SKB_EXT_ERR(skb)->opt_stats)
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
 				 skb->len, skb->data);
 	}
-- 
cgit v1.2.3


From e8f1f34a344d060eaf1918089369c4c1172a153b Mon Sep 17 00:00:00 2001
From: Zi Shen Lim <zlim.lnx@gmail.com>
Date: Sun, 19 Mar 2017 23:03:14 -0700
Subject: selftests/bpf: fix broken build, take 2

Merge of 'linux-kselftest-4.11-rc1':

1. Partially removed use of 'test_objs' target, breaking force rebuild of
BPFOBJ, introduced in commit d498f8719a09 ("bpf: Rebuild bpf.o for any
dependency update").

  Update target so dependency on BPFOBJ is restored.

2. Introduced commit 2047f1d8ba28 ("selftests: Fix the .c linking rule")
which fixes order of LDLIBS.

  Commit d02d8986a768 ("bpf: Always test unprivileged programs") added
libcap dependency into CFLAGS. Use LDLIBS instead to fix linking of
test_verifier.

3. Introduced commit d83c3ba0b926 ("selftests: Fix selftests build to
just build, not run tests").

  Reordering the Makefile allows us to remove the 'all' target.

Tested both:
    selftests/bpf$ make
and
    selftests$ make TARGETS=bpf
on Ubuntu 16.04.2.

Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 67531f47781b..6a1ad58cb66f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,22 +1,23 @@
 LIBDIR := ../../../lib
-BPFOBJ := $(LIBDIR)/bpf/bpf.o
+BPFDIR := $(LIBDIR)/bpf
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
+CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
+LDLIBS += -lcap
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/bpf.o
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
 
-.PHONY: all clean force
+.PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
 force:
 
 $(BPFOBJ): force
-	$(MAKE) -C $(dir $(BPFOBJ))
-
-$(test_objs): $(BPFOBJ)
-
-include ../lib.mk
+	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
-- 
cgit v1.2.3


From 4071898bf0f4d79ff353db327af2a15123272548 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sun, 19 Mar 2017 09:19:57 -0700
Subject: net: qmi_wwan: Add USB IDs for MDM6600 modem on Motorola Droid 4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This gets qmicli working with the MDM6600 modem.

Cc: Bjørn Mork <bjorn@mork.no>
Reviewed-by: Sebastian Reichel <sre@kernel.org>
Tested-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index f8d55aa058ec..156f7f85e486 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -580,6 +580,10 @@ static const struct usb_device_id products[] = {
 		USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
 		.driver_info        = (unsigned long)&qmi_wwan_info,
 	},
+	{	/* Motorola Mapphone devices with MDM6600 */
+		USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
+		.driver_info        = (unsigned long)&qmi_wwan_info,
+	},
 
 	/* 2. Combined interface devices matching on class+protocol */
 	{	/* Huawei E367 and possibly others in "Windows mode" */
-- 
cgit v1.2.3


From dd7406dd334a98ada3ff5371847a3eeb4ba16313 Mon Sep 17 00:00:00 2001
From: Alex Hemme <ahemme@cisco.com>
Date: Tue, 7 Mar 2017 14:38:29 -0500
Subject: hwmon: (max31790) Set correct PWM value

Traced fans not spinning to incorrect PWM value being written.
The passed in value was written instead of the calulated value.

Fixes: 54187ff9d766 ("hwmon: (max31790) Convert to use new hwmon registration API")
Signed-off-by: Alex Hemme <ahemme@cisco.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max31790.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index c1b9275978f9..281491cca510 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -311,7 +311,7 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel,
 		data->pwm[channel] = val << 8;
 		err = i2c_smbus_write_word_swapped(client,
 						   MAX31790_REG_PWMOUT(channel),
-						   val);
+						   data->pwm[channel]);
 		break;
 	case hwmon_pwm_enable:
 		fan_config = data->fan_config[channel];
-- 
cgit v1.2.3


From 8358378b22518d92424597503d3c1cd302a490b6 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 12 Mar 2017 06:18:58 -0700
Subject: hwmon: (it87) Avoid registering the same chip on both SIO addresses

IT8705F is known to respond on both SIO addresses. Registering it twice
may result in system lockups.

Reported-by: Russell King <linux@armlinux.org.uk>
Fixes: e84bd9535e2b ("hwmon: (it87) Add support for second Super-IO chip")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/it87.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index efb01c247e2d..4dfc7238313e 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -3198,7 +3198,7 @@ static int __init sm_it87_init(void)
 {
 	int sioaddr[2] = { REG_2E, REG_4E };
 	struct it87_sio_data sio_data;
-	unsigned short isa_address;
+	unsigned short isa_address[2];
 	bool found = false;
 	int i, err;
 
@@ -3208,15 +3208,29 @@ static int __init sm_it87_init(void)
 
 	for (i = 0; i < ARRAY_SIZE(sioaddr); i++) {
 		memset(&sio_data, 0, sizeof(struct it87_sio_data));
-		isa_address = 0;
-		err = it87_find(sioaddr[i], &isa_address, &sio_data);
-		if (err || isa_address == 0)
+		isa_address[i] = 0;
+		err = it87_find(sioaddr[i], &isa_address[i], &sio_data);
+		if (err || isa_address[i] == 0)
 			continue;
+		/*
+		 * Don't register second chip if its ISA address matches
+		 * the first chip's ISA address.
+		 */
+		if (i && isa_address[i] == isa_address[0])
+			break;
 
-		err = it87_device_add(i, isa_address, &sio_data);
+		err = it87_device_add(i, isa_address[i], &sio_data);
 		if (err)
 			goto exit_dev_unregister;
+
 		found = true;
+
+		/*
+		 * IT8705F may respond on both SIO addresses.
+		 * Stop probing after finding one.
+		 */
+		if (sio_data.type == it87)
+			break;
 	}
 
 	if (!found) {
-- 
cgit v1.2.3


From a5023a99393dab276069cd60dad3e61d57720fda Mon Sep 17 00:00:00 2001
From: Peter Huewe <PeterHuewe@gmx.de>
Date: Fri, 17 Mar 2017 00:28:56 +0100
Subject: hwmon: Add missing HWMON_T_ALARM

Unfortunately the HWMON_T_ALARM define was missing,
although the associated entry was present in hwmon_temp_attributes.
This is needed to convert drivers to the new interface which use channel
based alarms.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 78d59dba563e..88b673749121 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -88,6 +88,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_CRIT_HYST	BIT(hwmon_temp_crit_hyst)
 #define HWMON_T_EMERGENCY	BIT(hwmon_temp_emergency)
 #define HWMON_T_EMERGENCY_HYST	BIT(hwmon_temp_emergency_hyst)
+#define HWMON_T_ALARM		BIT(hwmon_temp_alarm)
 #define HWMON_T_MIN_ALARM	BIT(hwmon_temp_min_alarm)
 #define HWMON_T_MAX_ALARM	BIT(hwmon_temp_max_alarm)
 #define HWMON_T_CRIT_ALARM	BIT(hwmon_temp_crit_alarm)
-- 
cgit v1.2.3


From de288e36fe33f7e06fa272bc8e2f85aa386d99aa Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <januszx.dziedzic@intel.com>
Date: Mon, 13 Mar 2017 14:11:32 +0200
Subject: usb: dwc3: gadget: delay unmap of bounced requests

In the case of bounced ep0 requests, we must delay DMA operation until
after ->complete() otherwise we might overwrite contents of req->buf.

This caused problems with RNDIS gadget.

Signed-off-by: Janusz Dziedzic <januszx.dziedzic@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 0d75158e43fe..79e7a3480d51 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -171,6 +171,7 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 		int status)
 {
 	struct dwc3			*dwc = dep->dwc;
+	unsigned int			unmap_after_complete = false;
 
 	req->started = false;
 	list_del(&req->list);
@@ -180,11 +181,19 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 	if (req->request.status == -EINPROGRESS)
 		req->request.status = status;
 
-	if (dwc->ep0_bounced && dep->number <= 1)
+	/*
+	 * NOTICE we don't want to unmap before calling ->complete() if we're
+	 * dealing with a bounced ep0 request. If we unmap it here, we would end
+	 * up overwritting the contents of req->buf and this could confuse the
+	 * gadget driver.
+	 */
+	if (dwc->ep0_bounced && dep->number <= 1) {
 		dwc->ep0_bounced = false;
-
-	usb_gadget_unmap_request_by_dev(dwc->sysdev,
-			&req->request, req->direction);
+		unmap_after_complete = true;
+	} else {
+		usb_gadget_unmap_request_by_dev(dwc->sysdev,
+				&req->request, req->direction);
+	}
 
 	trace_dwc3_gadget_giveback(req);
 
@@ -192,6 +201,10 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 	usb_gadget_giveback_request(&dep->endpoint, &req->request);
 	spin_lock(&dwc->lock);
 
+	if (unmap_after_complete)
+		usb_gadget_unmap_request_by_dev(dwc->sysdev,
+				&req->request, req->direction);
+
 	if (dep->number > 1)
 		pm_runtime_put(dwc->dev);
 }
-- 
cgit v1.2.3


From 74098c4ac782afd71b35ac56f9536ae2f5cd7da7 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 14 Mar 2017 12:09:56 +0100
Subject: usb: gadget: acm: fix endianness in notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gadget code exports the bitfield for serial status changes
over the wire in its internal endianness. The fix is to convert
to little endian before sending it over the wire.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: 家瑋 <momo1208@gmail.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_acm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c
index a30766ca4226..5e3828d9dac7 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm)
 {
 	struct usb_composite_dev *cdev = acm->port.func.config->cdev;
 	int			status;
+	__le16			serial_state;
 
 	spin_lock(&acm->lock);
 	if (acm->notify_req) {
 		dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n",
 			acm->port_num, acm->serial_state);
+		serial_state = cpu_to_le16(acm->serial_state);
 		status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE,
-				0, &acm->serial_state, sizeof(acm->serial_state));
+				0, &serial_state, sizeof(acm->serial_state));
 	} else {
 		acm->pending = true;
 		status = 0;
-- 
cgit v1.2.3


From 09424c50b7dff40cb30011c09114404a4656e023 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 8 Mar 2017 16:05:43 +0200
Subject: usb: gadget: f_uvc: Fix SuperSpeed companion descriptor's
 wBytesPerInterval

The streaming_maxburst module parameter is 0 offset (0..15)
so we must add 1 while using it for wBytesPerInterval
calculation for the SuperSpeed companion descriptor.

Without this host uvcvideo driver will always see the wrong
wBytesPerInterval for SuperSpeed uvc gadget and may not find
a suitable video interface endpoint.
e.g. for streaming_maxburst = 0 case it will always
fail as wBytePerInterval was evaluating to 0.

Cc: stable@vger.kernel.org
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uvc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 29b41b5dee04..c7689d05356c 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -625,7 +625,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	uvc_ss_streaming_comp.bMaxBurst = opts->streaming_maxburst;
 	uvc_ss_streaming_comp.wBytesPerInterval =
 		cpu_to_le16(max_packet_size * max_packet_mult *
-			    opts->streaming_maxburst);
+			    (opts->streaming_maxburst + 1));
 
 	/* Allocate endpoints. */
 	ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep);
-- 
cgit v1.2.3


From 16bb05d98c904a4f6c5ce7e2d992299f794acbf2 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 8 Mar 2017 16:05:44 +0200
Subject: usb: gadget: f_uvc: Sanity check wMaxPacketSize for SuperSpeed

As per USB3.0 Specification "Table 9-20. Standard Endpoint Descriptor",
for interrupt and isochronous endpoints, wMaxPacketSize must be set to
1024 if the endpoint defines bMaxBurst to be greater than zero.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uvc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index c7689d05356c..f8a1881609a2 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U);
 	opts->streaming_maxburst = min(opts->streaming_maxburst, 15U);
 
+	/* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */
+	if (opts->streaming_maxburst &&
+	    (opts->streaming_maxpacket % 1024) != 0) {
+		opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
+		INFO(cdev, "overriding streaming_maxpacket to %d\n",
+		     opts->streaming_maxpacket);
+	}
+
 	/* Fill in the FS/HS/SS Video Streaming specific descriptors from the
 	 * module parameters.
 	 *
-- 
cgit v1.2.3


From 1f459262b0e1649a1e5ad12fa4c66eb76c2220ce Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 10 Mar 2017 15:39:32 -0600
Subject: usb: gadget: udc: remove pointer dereference after free

Remove pointer dereference after free.

Addresses-Coverity-ID: 1091173
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/pch_udc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index a97da645c1b9..8a365aad66fe 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev,
 		td = phys_to_virt(addr);
 		addr2 = (dma_addr_t)td->next;
 		pci_pool_free(dev->data_requests, td, addr);
-		td->next = 0x00;
 		addr = addr2;
 	}
 	req->chain_len = 1;
-- 
cgit v1.2.3


From 25cd9721c2b16ee0d775e36ec3af31f392003f80 Mon Sep 17 00:00:00 2001
From: Krzysztof Opasiak <k.opasiak@samsung.com>
Date: Tue, 31 Jan 2017 18:12:31 +0100
Subject: usb: gadget: f_hid: fix: Don't access hidg->req without spinlock held

hidg->req should be accessed only with write_spinlock held as it is
set to NULL when we get disabled by host.

Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_hid.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 89b48bcc377a..5eea44823ca0 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -367,7 +367,7 @@ try_again:
 	count  = min_t(unsigned, count, hidg->report_length);
 
 	spin_unlock_irqrestore(&hidg->write_spinlock, flags);
-	status = copy_from_user(hidg->req->buf, buffer, count);
+	status = copy_from_user(req->buf, buffer, count);
 
 	if (status != 0) {
 		ERROR(hidg->func.config->cdev,
@@ -378,9 +378,9 @@ try_again:
 
 	spin_lock_irqsave(&hidg->write_spinlock, flags);
 
-	/* we our function has been disabled by host */
+	/* when our function has been disabled by host */
 	if (!hidg->req) {
-		free_ep_req(hidg->in_ep, hidg->req);
+		free_ep_req(hidg->in_ep, req);
 		/*
 		 * TODO
 		 * Should we fail with error here?
@@ -394,7 +394,7 @@ try_again:
 	req->complete = f_hidg_req_complete;
 	req->context  = hidg;
 
-	status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC);
+	status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC);
 	if (status < 0) {
 		ERROR(hidg->func.config->cdev,
 			"usb_ep_queue error on int endpoint %zd\n", status);
-- 
cgit v1.2.3


From 90400c5884b89a5db232049721ef4dc1ab2f1f1c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 24 Feb 2017 14:35:54 +0200
Subject: extcon: int3496: Rename GPIO pins in accordance with binding

Update GPIO pin names in extcon-intel-int3496.c driver to follow
the existing extcon binding.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 38eb6cab938f..81713bf7487e 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -105,13 +105,13 @@ static int int3496_probe(struct platform_device *pdev)
 		return data->usb_id_irq;
 	}
 
-	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en",
+	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus",
 						 INT3496_GPIO_VBUS_EN,
 						 GPIOD_ASIS);
 	if (IS_ERR(data->gpio_vbus_en))
 		dev_info(dev, "can't request VBUS EN GPIO\n");
 
-	data->gpio_usb_mux = devm_gpiod_get_index(dev, "usb mux",
+	data->gpio_usb_mux = devm_gpiod_get_index(dev, "mux",
 						 INT3496_GPIO_USB_MUX,
 						 GPIOD_ASIS);
 	if (IS_ERR(data->gpio_usb_mux))
-- 
cgit v1.2.3


From 8cb2cbae45ae46b1e1be16950670d5c5d4408474 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 24 Feb 2017 14:35:55 +0200
Subject: extcon: int3496: Add GPIO ACPI mapping table

In order to make GPIO ACPI library stricter prepare users of
gpiod_get_index() to correctly behave when there no mapping is
provided by firmware.

Here we add explicit mapping between _CRS GpioIo() resources and
their names used in the driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 Documentation/extcon/intel-int3496.txt |  5 +++++
 drivers/extcon/extcon-intel-int3496.c  | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/Documentation/extcon/intel-int3496.txt b/Documentation/extcon/intel-int3496.txt
index af0b366c25b7..8155dbc7fad3 100644
--- a/Documentation/extcon/intel-int3496.txt
+++ b/Documentation/extcon/intel-int3496.txt
@@ -20,3 +20,8 @@ Index 1: The output gpio for enabling Vbus output from the device to the otg
 Index 2: The output gpio for muxing of the data pins between the USB host and
          the USB peripheral controller, write 1 to mux to the peripheral
          controller
+
+There is a mapping between indices and GPIO connection IDs as follows
+	id	index 0
+	vbus	index 1
+	mux	index 2
diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 81713bf7487e..22a529eb1b1a 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -45,6 +45,17 @@ static const unsigned int int3496_cable[] = {
 	EXTCON_NONE,
 };
 
+static const struct acpi_gpio_params id_gpios = { INT3496_GPIO_USB_ID, 0, false };
+static const struct acpi_gpio_params vbus_gpios = { INT3496_GPIO_VBUS_EN, 0, false };
+static const struct acpi_gpio_params mux_gpios = { INT3496_GPIO_USB_MUX, 0, false };
+
+static const struct acpi_gpio_mapping acpi_int3496_default_gpios[] = {
+	{ "id-gpios", &id_gpios, 1 },
+	{ "vbus-gpios", &vbus_gpios, 1 },
+	{ "mux-gpios", &mux_gpios, 1 },
+	{ },
+};
+
 static void int3496_do_usb_id(struct work_struct *work)
 {
 	struct int3496_data *data =
@@ -83,6 +94,13 @@ static int int3496_probe(struct platform_device *pdev)
 	struct int3496_data *data;
 	int ret;
 
+	ret = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev),
+					acpi_int3496_default_gpios);
+	if (ret) {
+		dev_err(dev, "can't add GPIO ACPI mapping\n");
+		return ret;
+	}
+
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -154,6 +172,8 @@ static int int3496_remove(struct platform_device *pdev)
 	devm_free_irq(&pdev->dev, data->usb_id_irq, data);
 	cancel_delayed_work_sync(&data->work);
 
+	acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev));
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 059c7874b87575e48d2c7702849ff56017059195 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Thu, 2 Mar 2017 18:10:10 +0000
Subject: extcon: int3496: Add dependency on X86 as it's Intel specific

Add dependency on X86 so it doesn't show up on other arches and
add a option for compile test so it still gets build coverage.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 96bbae579c0b..fc09c76248b4 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -44,7 +44,7 @@ config EXTCON_GPIO
 
 config EXTCON_INTEL_INT3496
 	tristate "Intel INT3496 ACPI device extcon driver"
-	depends on GPIOLIB && ACPI
+	depends on GPIOLIB && ACPI && (X86 || COMPILE_TEST)
 	help
 	  Say Y here to enable extcon support for USB OTG ports controlled by
 	  an Intel INT3496 ACPI device.
-- 
cgit v1.2.3


From 408c5b41d215b317ab58c98b959454407ee4a53d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 10 Mar 2017 21:52:08 +0100
Subject: extcon: int3496: Use gpiod_get instead of gpiod_get_index

Now that we've an acpi mapping table we should be using gpiod_get
instead of gpiod_get_index.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 22a529eb1b1a..00f0e60a00ce 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -108,9 +108,7 @@ static int int3496_probe(struct platform_device *pdev)
 	data->dev = dev;
 	INIT_DELAYED_WORK(&data->work, int3496_do_usb_id);
 
-	data->gpio_usb_id = devm_gpiod_get_index(dev, "id",
-						INT3496_GPIO_USB_ID,
-						GPIOD_IN);
+	data->gpio_usb_id = devm_gpiod_get(dev, "id", GPIOD_IN);
 	if (IS_ERR(data->gpio_usb_id)) {
 		ret = PTR_ERR(data->gpio_usb_id);
 		dev_err(dev, "can't request USB ID GPIO: %d\n", ret);
@@ -123,15 +121,11 @@ static int int3496_probe(struct platform_device *pdev)
 		return data->usb_id_irq;
 	}
 
-	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus",
-						 INT3496_GPIO_VBUS_EN,
-						 GPIOD_ASIS);
+	data->gpio_vbus_en = devm_gpiod_get(dev, "vbus", GPIOD_ASIS);
 	if (IS_ERR(data->gpio_vbus_en))
 		dev_info(dev, "can't request VBUS EN GPIO\n");
 
-	data->gpio_usb_mux = devm_gpiod_get_index(dev, "mux",
-						 INT3496_GPIO_USB_MUX,
-						 GPIOD_ASIS);
+	data->gpio_usb_mux = devm_gpiod_get(dev, "mux", GPIOD_ASIS);
 	if (IS_ERR(data->gpio_usb_mux))
 		dev_info(dev, "can't request USB MUX GPIO\n");
 
-- 
cgit v1.2.3


From 70216fd937fea79c20705400540fa48f86ddf1c5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 13 Mar 2017 12:28:34 +0100
Subject: extcon: int3496: Set the id pin to direction-input if necessary

With the new more strict ACPI gpio code the dsdt's IoRestriction
flags are honored on gpiod_get, but in some dsdt's it is wrong,
so explicitly call gpiod_direction_input on the id gpio if
necessary.

This fixes the following errors when the int3496 code is used
together with the new more strict ACPI gpio code:

[ 2382.484415] gpio gpiochip1: (INT33FF:01): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ
[ 2382.484425] gpio gpiochip1: (INT33FF:01): unable to lock HW IRQ 3 for IRQ
[ 2382.484429] genirq: Failed to request resources for INT3496:00 (irq 174) on irqchip chv-gpio
[ 2382.484518] intel-int3496 INT3496:00: can't request IRQ for USB ID GPIO: -22
[ 2382.500359] intel-int3496: probe of INT3496:00 failed with error -22

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 00f0e60a00ce..9d17984bbbd4 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -113,6 +113,9 @@ static int int3496_probe(struct platform_device *pdev)
 		ret = PTR_ERR(data->gpio_usb_id);
 		dev_err(dev, "can't request USB ID GPIO: %d\n", ret);
 		return ret;
+	} else if (gpiod_get_direction(data->gpio_usb_id) != GPIOF_DIR_IN) {
+		dev_warn(dev, FW_BUG "USB ID GPIO not in input mode, fixing\n");
+		gpiod_direction_input(data->gpio_usb_id);
 	}
 
 	data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id);
-- 
cgit v1.2.3


From 5003ae1e735e6bfe4679d9bed6846274f322e77e Mon Sep 17 00:00:00 2001
From: Koos Vriezen <koos.vriezen@gmail.com>
Date: Wed, 1 Mar 2017 21:02:50 +0100
Subject: iommu/vt-d: Fix NULL pointer dereference in device_to_iommu

The function device_to_iommu() in the Intel VT-d driver
lacks a NULL-ptr check, resulting in this oops at boot on
some platforms:

 BUG: unable to handle kernel NULL pointer dereference at 00000000000007ab
 IP: [<ffffffff8132234a>] device_to_iommu+0x11a/0x1a0
 PGD 0

 [...]

 Call Trace:
   ? find_or_alloc_domain.constprop.29+0x1a/0x300
   ? dw_dma_probe+0x561/0x580 [dw_dmac_core]
   ? __get_valid_domain_for_dev+0x39/0x120
   ? __intel_map_single+0x138/0x180
   ? intel_alloc_coherent+0xb6/0x120
   ? sst_hsw_dsp_init+0x173/0x420 [snd_soc_sst_haswell_pcm]
   ? mutex_lock+0x9/0x30
   ? kernfs_add_one+0xdb/0x130
   ? devres_add+0x19/0x60
   ? hsw_pcm_dev_probe+0x46/0xd0 [snd_soc_sst_haswell_pcm]
   ? platform_drv_probe+0x30/0x90
   ? driver_probe_device+0x1ed/0x2b0
   ? __driver_attach+0x8f/0xa0
   ? driver_probe_device+0x2b0/0x2b0
   ? bus_for_each_dev+0x55/0x90
   ? bus_add_driver+0x110/0x210
   ? 0xffffffffa11ea000
   ? driver_register+0x52/0xc0
   ? 0xffffffffa11ea000
   ? do_one_initcall+0x32/0x130
   ? free_vmap_area_noflush+0x37/0x70
   ? kmem_cache_alloc+0x88/0xd0
   ? do_init_module+0x51/0x1c4
   ? load_module+0x1ee9/0x2430
   ? show_taint+0x20/0x20
   ? kernel_read_file+0xfd/0x190
   ? SyS_finit_module+0xa3/0xb0
   ? do_syscall_64+0x4a/0xb0
   ? entry_SYSCALL64_slow_path+0x25/0x25
 Code: 78 ff ff ff 4d 85 c0 74 ee 49 8b 5a 10 0f b6 9b e0 00 00 00 41 38 98 e0 00 00 00 77 da 0f b6 eb 49 39 a8 88 00 00 00 72 ce eb 8f <41> f6 82 ab 07 00 00 04 0f 85 76 ff ff ff 0f b6 4d 08 88 0e 49
 RIP  [<ffffffff8132234a>] device_to_iommu+0x11a/0x1a0
  RSP <ffffc90001457a78>
 CR2: 00000000000007ab
 ---[ end trace 16f974b6d58d0aad ]---

Add the missing pointer check.

Fixes: 1c387188c60f53b338c20eee32db055dfe022a9b ("iommu/vt-d: Fix IOMMU lookup for SR-IOV Virtual Functions")
Signed-off-by: Koos Vriezen <koos.vriezen@gmail.com>
Cc: stable@vger.kernel.org # 4.8.15+
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 238ad3447712..91d60493b57c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -916,7 +916,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 				 * which we used for the IOMMU lookup. Strictly speaking
 				 * we could do this for all PCI devices; we only need to
 				 * get the BDF# from the scope table for ACPI matches. */
-				if (pdev->is_virtfn)
+				if (pdev && pdev->is_virtfn)
 					goto got_pdev;
 
 				*bus = drhd->devices[i].bus;
-- 
cgit v1.2.3


From 6e2e0eea072f6f82ac0afbcfa8dc38dcc3a29fa4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 9 Feb 2017 13:09:08 -0200
Subject: [media] coda/imx-vdoa: platform_driver should not be const

The device driver platform is actually written to during registration,
for setting the owner field, so platform_driver_register() does not
take a const pointer:

drivers/media/platform/coda/imx-vdoa.c: In function 'vdoa_driver_init':
drivers/media/platform/coda/imx-vdoa.c:333:213: error: passing argument 1 of '__platform_driver_register' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]
 module_platform_driver(vdoa_driver);
In file included from drivers/media/platform/coda/imx-vdoa.c:22:0:
include/linux/platform_device.h:199:12: note: expected 'struct platform_driver *' but argument is of type 'const struct platform_driver *'
 extern int __platform_driver_register(struct platform_driver *,
            ^~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/media/platform/coda/imx-vdoa.c: In function 'vdoa_driver_exit':
drivers/media/platform/coda/imx-vdoa.c:333:626: error: passing argument 1 of 'platform_driver_unregister' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

Remove the modifier again.

Fixes: d2fe28feaebb ("[media] coda/imx-vdoa: constify structs")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/coda/imx-vdoa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c
index 67fd8ffa60a4..669a4c82f1ff 100644
--- a/drivers/media/platform/coda/imx-vdoa.c
+++ b/drivers/media/platform/coda/imx-vdoa.c
@@ -321,7 +321,7 @@ static const struct of_device_id vdoa_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, vdoa_dt_ids);
 
-static const struct platform_driver vdoa_driver = {
+static struct platform_driver vdoa_driver = {
 	.probe		= vdoa_probe,
 	.remove		= vdoa_remove,
 	.driver		= {
-- 
cgit v1.2.3


From 389e3f0d57e6ab50c207bee5ea2c4a5982a029c2 Mon Sep 17 00:00:00 2001
From: Shailendra Verma <shailendra.v@samsung.com>
Date: Fri, 2 Dec 2016 02:48:01 -0200
Subject: [media] bdisp: Clean up file handle in open() error path

The File handle is not yet added in the vdev list.So no need to call
v4l2_fh_del(&ctx->fh)if it fails to create control.

Signed-off-by: Shailendra Verma <shailendra.v@samsung.com>
Reviewed-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
index 823608112d89..7918b928f058 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
@@ -632,8 +632,8 @@ static int bdisp_open(struct file *file)
 
 error_ctrls:
 	bdisp_ctrls_delete(ctx);
-error_fh:
 	v4l2_fh_del(&ctx->fh);
+error_fh:
 	v4l2_fh_exit(&ctx->fh);
 	bdisp_hw_free_nodes(ctx);
 mem_ctx:
-- 
cgit v1.2.3


From 24a47426066c8ba16a4db1b20a41d63187281195 Mon Sep 17 00:00:00 2001
From: Thibault Saunier <thibault.saunier@osg.samsung.com>
Date: Wed, 1 Feb 2017 18:05:21 -0200
Subject: [media] exynos-gsc: Do not swap cb/cr for semi planar formats

In the case of semi planar formats cb and cr are in the same plane
in memory, meaning that will be set to 'cb' whatever the format is,
and whatever the (packed) order of those components are.

Suggested-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Thibault Saunier <thibault.saunier@osg.samsung.com>
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Acked-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/exynos-gsc/gsc-core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c
index cbb03768f5d7..0f0c389f8897 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -861,9 +861,7 @@ int gsc_prepare_addr(struct gsc_ctx *ctx, struct vb2_buffer *vb,
 
 	if ((frame->fmt->pixelformat == V4L2_PIX_FMT_VYUY) ||
 		(frame->fmt->pixelformat == V4L2_PIX_FMT_YVYU) ||
-		(frame->fmt->pixelformat == V4L2_PIX_FMT_NV61) ||
 		(frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420) ||
-		(frame->fmt->pixelformat == V4L2_PIX_FMT_NV21) ||
 		(frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420M))
 		swap(addr->cb, addr->cr);
 
-- 
cgit v1.2.3


From 95a49603707d982b25d17c5b70e220a05556a2f9 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Mar 2017 10:14:43 +0800
Subject: blk-mq: don't complete un-started request in timeout handler

When iterating busy requests in timeout handler,
if the STARTED flag of one request isn't set, that means
the request is being processed in block layer or driver, and
isn't submitted to hardware yet.

In current implementation of blk_mq_check_expired(),
if the request queue becomes dying, un-started requests are
handled as being completed/freed immediately. This way is
wrong, and can cause rq corruption or double allocation[1][2],
when doing I/O and removing&resetting NVMe device at the sametime.

This patch fixes several issues reported by Yi Zhang.

[1]. oops log 1
[  581.789754] ------------[ cut here ]------------
[  581.789758] kernel BUG at block/blk-mq.c:374!
[  581.789760] invalid opcode: 0000 [#1] SMP
[  581.789761] Modules linked in: vfat fat ipmi_ssif intel_rapl sb_edac
edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm nvme
irqbypass crct10dif_pclmul nvme_core crc32_pclmul ghash_clmulni_intel
intel_cstate ipmi_si mei_me ipmi_devintf intel_uncore sg ipmi_msghandler
intel_rapl_perf iTCO_wdt mei iTCO_vendor_support mxm_wmi lpc_ich dcdbas shpchp
pcspkr acpi_power_meter wmi nfsd auth_rpcgss nfs_acl lockd dm_multipath grace
sunrpc ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper
syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci
crc32c_intel tg3 libata megaraid_sas i2c_core ptp fjes pps_core dm_mirror
dm_region_hash dm_log dm_mod
[  581.789796] CPU: 1 PID: 1617 Comm: kworker/1:1H Not tainted 4.10.0.bz1420297+ #4
[  581.789797] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016
[  581.789804] Workqueue: kblockd blk_mq_timeout_work
[  581.789806] task: ffff8804721c8000 task.stack: ffffc90006ee4000
[  581.789809] RIP: 0010:blk_mq_end_request+0x58/0x70
[  581.789810] RSP: 0018:ffffc90006ee7d50 EFLAGS: 00010202
[  581.789811] RAX: 0000000000000001 RBX: ffff8802e4195340 RCX: ffff88028e2f4b88
[  581.789812] RDX: 0000000000001000 RSI: 0000000000001000 RDI: 0000000000000000
[  581.789813] RBP: ffffc90006ee7d60 R08: 0000000000000003 R09: ffff88028e2f4b00
[  581.789814] R10: 0000000000001000 R11: 0000000000000001 R12: 00000000fffffffb
[  581.789815] R13: ffff88042abe5780 R14: 000000000000002d R15: ffff88046fbdff80
[  581.789817] FS:  0000000000000000(0000) GS:ffff88047fc00000(0000) knlGS:0000000000000000
[  581.789818] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  581.789819] CR2: 00007f64f403a008 CR3: 000000014d078000 CR4: 00000000001406e0
[  581.789820] Call Trace:
[  581.789825]  blk_mq_check_expired+0x76/0x80
[  581.789828]  bt_iter+0x45/0x50
[  581.789830]  blk_mq_queue_tag_busy_iter+0xdd/0x1f0
[  581.789832]  ? blk_mq_rq_timed_out+0x70/0x70
[  581.789833]  ? blk_mq_rq_timed_out+0x70/0x70
[  581.789840]  ? __switch_to+0x140/0x450
[  581.789841]  blk_mq_timeout_work+0x88/0x170
[  581.789845]  process_one_work+0x165/0x410
[  581.789847]  worker_thread+0x137/0x4c0
[  581.789851]  kthread+0x101/0x140
[  581.789853]  ? rescuer_thread+0x3b0/0x3b0
[  581.789855]  ? kthread_park+0x90/0x90
[  581.789860]  ret_from_fork+0x2c/0x40
[  581.789861] Code: 48 85 c0 74 0d 44 89 e6 48 89 df ff d0 5b 41 5c 5d c3 48
8b bb 70 01 00 00 48 85 ff 75 0f 48 89 df e8 7d f0 ff ff 5b 41 5c 5d c3 <0f>
0b e8 71 f0 ff ff 90 eb e9 0f 1f 40 00 66 2e 0f 1f 84 00 00
[  581.789882] RIP: blk_mq_end_request+0x58/0x70 RSP: ffffc90006ee7d50
[  581.789889] ---[ end trace bcaf03d9a14a0a70 ]---

[2]. oops log2
[ 6984.857362] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[ 6984.857372] IP: nvme_queue_rq+0x6e6/0x8cd [nvme]
[ 6984.857373] PGD 0
[ 6984.857374]
[ 6984.857376] Oops: 0000 [#1] SMP
[ 6984.857379] Modules linked in: ipmi_ssif vfat fat intel_rapl sb_edac
edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm
irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_si iTCO_wdt
iTCO_vendor_support mxm_wmi ipmi_devintf intel_cstate sg dcdbas intel_uncore
mei_me intel_rapl_perf mei pcspkr lpc_ich ipmi_msghandler shpchp
acpi_power_meter wmi nfsd auth_rpcgss dm_multipath nfs_acl lockd grace sunrpc
ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea
sysfillrect crc32c_intel sysimgblt fb_sys_fops ttm nvme drm nvme_core ahci
libahci i2c_core tg3 libata ptp megaraid_sas pps_core fjes dm_mirror
dm_region_hash dm_log dm_mod
[ 6984.857416] CPU: 7 PID: 1635 Comm: kworker/7:1H Not tainted
4.10.0-2.el7.bz1420297.x86_64 #1
[ 6984.857417] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016
[ 6984.857427] Workqueue: kblockd blk_mq_run_work_fn
[ 6984.857429] task: ffff880476e3da00 task.stack: ffffc90002e90000
[ 6984.857432] RIP: 0010:nvme_queue_rq+0x6e6/0x8cd [nvme]
[ 6984.857433] RSP: 0018:ffffc90002e93c50 EFLAGS: 00010246
[ 6984.857434] RAX: 0000000000000000 RBX: ffff880275646600 RCX: 0000000000001000
[ 6984.857435] RDX: 0000000000000fff RSI: 00000002fba2a000 RDI: ffff8804734e6950
[ 6984.857436] RBP: ffffc90002e93d30 R08: 0000000000002000 R09: 0000000000001000
[ 6984.857437] R10: 0000000000001000 R11: 0000000000000000 R12: ffff8804741d8000
[ 6984.857438] R13: 0000000000000040 R14: ffff880475649f80 R15: ffff8804734e6780
[ 6984.857439] FS:  0000000000000000(0000) GS:ffff88047fcc0000(0000) knlGS:0000000000000000
[ 6984.857440] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6984.857442] CR2: 0000000000000010 CR3: 0000000001c09000 CR4: 00000000001406e0
[ 6984.857443] Call Trace:
[ 6984.857451]  ? mempool_free+0x2b/0x80
[ 6984.857455]  ? bio_free+0x4e/0x60
[ 6984.857459]  blk_mq_dispatch_rq_list+0xf5/0x230
[ 6984.857462]  blk_mq_process_rq_list+0x133/0x170
[ 6984.857465]  __blk_mq_run_hw_queue+0x8c/0xa0
[ 6984.857467]  blk_mq_run_work_fn+0x12/0x20
[ 6984.857473]  process_one_work+0x165/0x410
[ 6984.857475]  worker_thread+0x137/0x4c0
[ 6984.857478]  kthread+0x101/0x140
[ 6984.857480]  ? rescuer_thread+0x3b0/0x3b0
[ 6984.857481]  ? kthread_park+0x90/0x90
[ 6984.857489]  ret_from_fork+0x2c/0x40
[ 6984.857490] Code: 8b bd 70 ff ff ff 89 95 50 ff ff ff 89 8d 58 ff ff ff 44
89 95 60 ff ff ff e8 b7 dd 12 e1 8b 95 50 ff ff ff 48 89 85 68 ff ff ff <4c>
8b 48 10 44 8b 58 18 8b 8d 58 ff ff ff 44 8b 95 60 ff ff ff
[ 6984.857511] RIP: nvme_queue_rq+0x6e6/0x8cd [nvme] RSP: ffffc90002e93c50
[ 6984.857512] CR2: 0000000000000010
[ 6984.895359] ---[ end trace 2d7ceb528432bf83 ]---

Cc: stable@vger.kernel.org
Reported-by: Yi Zhang <yizhan@redhat.com>
Tested-by: Yi Zhang <yizhan@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a4546f060e80..08a49c69738b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -697,17 +697,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 {
 	struct blk_mq_timeout_data *data = priv;
 
-	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
-		/*
-		 * If a request wasn't started before the queue was
-		 * marked dying, kill it here or it'll go unnoticed.
-		 */
-		if (unlikely(blk_queue_dying(rq->q))) {
-			rq->errors = -EIO;
-			blk_mq_end_request(rq, rq->errors);
-		}
+	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
 		return;
-	}
 
 	if (time_after_eq(jiffies, rq->deadline)) {
 		if (!blk_mark_rq_complete(rq))
-- 
cgit v1.2.3


From 7d2aa6b814476a2e2794960f844344519246df72 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 20 Mar 2017 10:17:56 +0100
Subject: iommu/exynos: Block SYSMMU while invalidating FLPD cache

Documentation specifies that SYSMMU should be in blocked state while
performing TLB/FLPD cache invalidation, so add needed calls to
sysmmu_block/unblock.

Fixes: 66a7ed84b345d ("iommu/exynos: Apply workaround of caching fault page table entries")
CC: stable@vger.kernel.org # v4.10+
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index a7e0821c9967..32d43f1994e4 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -512,7 +512,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 	spin_lock_irqsave(&data->lock, flags);
 	if (data->active && data->version >= MAKE_MMU_VER(3, 3)) {
 		clk_enable(data->clk_master);
-		__sysmmu_tlb_invalidate_entry(data, iova, 1);
+		if (sysmmu_block(data)) {
+			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+			sysmmu_unblock(data);
+		}
 		clk_disable(data->clk_master);
 	}
 	spin_unlock_irqrestore(&data->lock, flags);
-- 
cgit v1.2.3


From cd37a296a9f890586665bb8974a8b17ee2f17d6d Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 20 Mar 2017 10:17:57 +0100
Subject: iommu/exynos: Workaround FLPD cache flush issues for SYSMMU v5

For some unknown reasons, in some cases, FLPD cache invalidation doesn't
work properly with SYSMMU v5 controllers found in Exynos5433 SoCs. This
can be observed by a firmware crash during initialization phase of MFC
video decoder available in the mentioned SoCs when IOMMU support is
enabled. To workaround this issue perform a full TLB/FLPD invalidation
in case of replacing any first level page descriptors in case of SYSMMU v5.

Fixes: 740a01eee9ada ("iommu/exynos: Add support for v5 SYSMMU")
CC: stable@vger.kernel.org # v4.10+
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 32d43f1994e4..c01bfcdb2383 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -513,7 +513,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 	if (data->active && data->version >= MAKE_MMU_VER(3, 3)) {
 		clk_enable(data->clk_master);
 		if (sysmmu_block(data)) {
-			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+			if (data->version >= MAKE_MMU_VER(5, 0))
+				__sysmmu_tlb_invalidate(data);
+			else
+				__sysmmu_tlb_invalidate_entry(data, iova, 1);
 			sysmmu_unblock(data);
 		}
 		clk_disable(data->clk_master);
-- 
cgit v1.2.3


From 9d3a4de4cb8db8e71730e36736272ef041836f68 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 16 Mar 2017 17:00:16 +0000
Subject: iommu: Disambiguate MSI region types

The introduction of reserved regions has left a couple of rough edges
which we could do with sorting out sooner rather than later. Since we
are not yet addressing the potential dynamic aspect of software-managed
reservations and presenting them at arbitrary fixed addresses, it is
incongruous that we end up displaying hardware vs. software-managed MSI
regions to userspace differently, especially since ARM-based systems may
actually require one or the other, or even potentially both at once,
(which iommu-dma currently has no hope of dealing with at all). Let's
resolve the former user-visible inconsistency ASAP before the ABI has
been baked into a kernel release, in a way that also lays the groundwork
for the latter shortcoming to be addressed by follow-up patches.

For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use
IOMMU_RESV_MSI to describe the hardware type, and document everything a
little bit. Since the x86 MSI remapping hardware falls squarely under
this meaning of IOMMU_RESV_MSI, apply that type to their regions as well,
so that we tell the same story to userspace across all platforms.

Secondly, as the various region types require quite different handling,
and it really makes little sense to ever try combining them, convert the
bitfield-esque #defines to a plain enum in the process before anyone
gets the wrong impression.

Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region")
Reviewed-by: Eric Auger <eric.auger@redhat.com>
CC: Alex Williamson <alex.williamson@redhat.com>
CC: David Woodhouse <dwmw2@infradead.org>
CC: kvm@vger.kernel.org
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c       |  2 +-
 drivers/iommu/arm-smmu-v3.c     |  2 +-
 drivers/iommu/arm-smmu.c        |  2 +-
 drivers/iommu/intel-iommu.c     |  2 +-
 drivers/iommu/iommu.c           |  5 +++--
 drivers/vfio/vfio_iommu_type1.c |  7 +++----
 include/linux/iommu.h           | 18 +++++++++++++-----
 7 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 98940d1392cb..b17536d6e69b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3202,7 +3202,7 @@ static void amd_iommu_get_resv_regions(struct device *dev,
 
 	region = iommu_alloc_resv_region(MSI_RANGE_START,
 					 MSI_RANGE_END - MSI_RANGE_START + 1,
-					 0, IOMMU_RESV_RESERVED);
+					 0, IOMMU_RESV_MSI);
 	if (!region)
 		return;
 	list_add_tail(&region->list, head);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5806a6acc94e..591bb96047c9 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1888,7 +1888,7 @@ static void arm_smmu_get_resv_regions(struct device *dev,
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 
 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-					 prot, IOMMU_RESV_MSI);
+					 prot, IOMMU_RESV_SW_MSI);
 	if (!region)
 		return;
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index abf6496843a6..b493c99e17f7 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1608,7 +1608,7 @@ static void arm_smmu_get_resv_regions(struct device *dev,
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 
 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-					 prot, IOMMU_RESV_MSI);
+					 prot, IOMMU_RESV_SW_MSI);
 	if (!region)
 		return;
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 91d60493b57c..d412a313a372 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5249,7 +5249,7 @@ static void intel_iommu_get_resv_regions(struct device *device,
 
 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
-				      0, IOMMU_RESV_RESERVED);
+				      0, IOMMU_RESV_MSI);
 	if (!reg)
 		return;
 	list_add_tail(&reg->list, head);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8ea14f41a979..3b67144dead2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -72,6 +72,7 @@ static const char * const iommu_group_resv_type_string[] = {
 	[IOMMU_RESV_DIRECT]	= "direct",
 	[IOMMU_RESV_RESERVED]	= "reserved",
 	[IOMMU_RESV_MSI]	= "msi",
+	[IOMMU_RESV_SW_MSI]	= "msi",
 };
 
 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
@@ -1743,8 +1744,8 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list)
 }
 
 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
-						  size_t length,
-						  int prot, int type)
+						  size_t length, int prot,
+						  enum iommu_resv_type type)
 {
 	struct iommu_resv_region *region;
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c26fa1f3ed86..32d2633092a3 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1182,8 +1182,7 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
 	return NULL;
 }
 
-static bool vfio_iommu_has_resv_msi(struct iommu_group *group,
-				    phys_addr_t *base)
+static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
 {
 	struct list_head group_resv_regions;
 	struct iommu_resv_region *region, *next;
@@ -1192,7 +1191,7 @@ static bool vfio_iommu_has_resv_msi(struct iommu_group *group,
 	INIT_LIST_HEAD(&group_resv_regions);
 	iommu_get_group_resv_regions(group, &group_resv_regions);
 	list_for_each_entry(region, &group_resv_regions, list) {
-		if (region->type & IOMMU_RESV_MSI) {
+		if (region->type == IOMMU_RESV_SW_MSI) {
 			*base = region->start;
 			ret = true;
 			goto out;
@@ -1283,7 +1282,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	if (ret)
 		goto out_domain;
 
-	resv_msi = vfio_iommu_has_resv_msi(iommu_group, &resv_msi_base);
+	resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
 
 	INIT_LIST_HEAD(&domain->group_list);
 	list_add(&group->next, &domain->group_list);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6a6de187ddc0..2e4de0deee53 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,9 +125,16 @@ enum iommu_attr {
 };
 
 /* These are the possible reserved region types */
-#define IOMMU_RESV_DIRECT	(1 << 0)
-#define IOMMU_RESV_RESERVED	(1 << 1)
-#define IOMMU_RESV_MSI		(1 << 2)
+enum iommu_resv_type {
+	/* Memory regions which must be mapped 1:1 at all times */
+	IOMMU_RESV_DIRECT,
+	/* Arbitrary "never map this or give it to a device" address ranges */
+	IOMMU_RESV_RESERVED,
+	/* Hardware MSI region (untranslated) */
+	IOMMU_RESV_MSI,
+	/* Software-managed MSI translation window */
+	IOMMU_RESV_SW_MSI,
+};
 
 /**
  * struct iommu_resv_region - descriptor for a reserved memory region
@@ -142,7 +149,7 @@ struct iommu_resv_region {
 	phys_addr_t		start;
 	size_t			length;
 	int			prot;
-	int			type;
+	enum iommu_resv_type	type;
 };
 
 #ifdef CONFIG_IOMMU_API
@@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern struct iommu_resv_region *
-iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
+iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
+			enum iommu_resv_type type);
 extern int iommu_get_group_resv_regions(struct iommu_group *group,
 					struct list_head *head);
 
-- 
cgit v1.2.3


From afd0e5a876703accb95894f23317a13e2c49b523 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Wed, 22 Mar 2017 17:08:25 +0530
Subject: arm64: kaslr: Fix up the kernel image alignment

If kernel image extends across alignment boundary, existing
code increases the KASLR offset by size of kernel image. The
offset is masked after resizing. There are cases, where after
masking, we may still have kernel image extending across
boundary. This eventually results in only 2MB block getting
mapped while creating the page tables. This results in data aborts
while accessing unmapped regions during second relocation (with
kaslr offset) in __primary_switch. To fix this problem, round up the
kernel image size, by swapper block size, before adding it for
correction.

For example consider below case, where kernel image still crosses
1GB alignment boundary, after masking the offset, which is fixed
by rounding up kernel image size.

SWAPPER_TABLE_SHIFT = 30
Swapper using section maps with section size 2MB.
CONFIG_PGTABLE_LEVELS = 3
VA_BITS = 39

_text  : 0xffffff8008080000
_end   : 0xffffff800aa1b000
offset : 0x1f35600000
mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1)

(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7c
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

offset after existing correction (before mask) = 0x1f37f9b000
(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

offset (after mask) = 0x1f37e00000
(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7c
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

new offset w/ rounding up = 0x1f38000000
(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR")
Cc: <stable@vger.kernel.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Srinivas Ramana <sramana@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/kaslr.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 769f24ef628c..d7e90d97f5c4 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -131,11 +131,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
 	/*
 	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
 	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
-	 * happens, increase the KASLR offset by the size of the kernel image.
+	 * happens, increase the KASLR offset by the size of the kernel image
+	 * rounded up by SWAPPER_BLOCK_SIZE.
 	 */
 	if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
-	    (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT))
-		offset = (offset + (u64)(_end - _text)) & mask;
+	    (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) {
+		u64 kimg_sz = _end - _text;
+		offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE))
+				& mask;
+	}
 
 	if (IS_ENABLED(CONFIG_KASAN))
 		/*
-- 
cgit v1.2.3


From f0c0cb99f74c03e2407ea553f6d46eb611e262b5 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Tue, 21 Mar 2017 16:51:19 -0400
Subject: arm64: dts: NS2: Add dma-coherent to relevant DT entries

Cache related issues with DMA rings and performance issues related to
caching are being caused by not properly setting the "dma-coherent" flag
in the device tree entries.  Adding it here to correct the issue.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: fd5e5dd56 ("arm64: dts: Add PCIe0 and PCIe4 DT nodes for NS2")
Fixes: dddc3c9d7 ("arm64: dts: NS2: add AMAC ethernet support")
Fixes: e79249143 ("arm64: dts: Add Broadcom Northstar2 device tree entries for PDC driver")
Fixes: ac9aae00f ("arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2")
Fixes: efc877676 ("arm64: dts: Add SDHCI DT node for NS2")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm64/boot/dts/broadcom/ns2.dtsi | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi
index 9f9e203c09c5..bcb03fc32665 100644
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -114,6 +114,7 @@
 	pcie0: pcie@20020000 {
 		compatible = "brcm,iproc-pcie";
 		reg = <0 0x20020000 0 0x1000>;
+		dma-coherent;
 
 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
@@ -144,6 +145,7 @@
 	pcie4: pcie@50020000 {
 		compatible = "brcm,iproc-pcie";
 		reg = <0 0x50020000 0 0x1000>;
+		dma-coherent;
 
 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
@@ -174,6 +176,7 @@
 	pcie8: pcie@60c00000 {
 		compatible = "brcm,iproc-pcie-paxc";
 		reg = <0 0x60c00000 0 0x1000>;
+		dma-coherent;
 		linux,pci-domain = <8>;
 
 		bus-range = <0x0 0x1>;
@@ -203,6 +206,7 @@
 			      <0x61030000 0x100>;
 			reg-names = "amac_base", "idm_base", "nicpm_base";
 			interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>;
+			dma-coherent;
 			phy-handle = <&gphy0>;
 			phy-mode = "rgmii";
 			status = "disabled";
@@ -213,6 +217,7 @@
 			reg = <0x612c0000 0x445>;  /* PDC FS0 regs */
 			interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -222,6 +227,7 @@
 			reg = <0x612e0000 0x445>;  /* PDC FS1 regs */
 			interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -231,6 +237,7 @@
 			reg = <0x61300000 0x445>;  /* PDC FS2 regs */
 			interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -240,6 +247,7 @@
 			reg = <0x61320000 0x445>;  /* PDC FS3 regs */
 			interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -644,6 +652,7 @@
 		sata: ahci@663f2000 {
 			compatible = "brcm,iproc-ahci", "generic-ahci";
 			reg = <0x663f2000 0x1000>;
+			dma-coherent;
 			reg-names = "ahci";
 			interrupts = <GIC_SPI 438 IRQ_TYPE_LEVEL_HIGH>;
 			#address-cells = <1>;
@@ -667,6 +676,7 @@
 			compatible = "brcm,sdhci-iproc-cygnus";
 			reg = <0x66420000 0x100>;
 			interrupts = <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>;
+			dma-coherent;
 			bus-width = <8>;
 			clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>;
 			status = "disabled";
@@ -676,6 +686,7 @@
 			compatible = "brcm,sdhci-iproc-cygnus";
 			reg = <0x66430000 0x100>;
 			interrupts = <GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>;
+			dma-coherent;
 			bus-width = <8>;
 			clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>;
 			status = "disabled";
-- 
cgit v1.2.3


From a05d4fd9176003e0c1f9c3d083f4dac19fd346ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Mar 2017 19:25:56 -0400
Subject: cgroup, net_cls: iterate the fds of only the tasks which are being
 migrated

The net_cls controller controls the classid field of each socket which
is associated with the cgroup.  Because the classid is per-socket
attribute, when a task migrates to another cgroup or the configured
classid of the cgroup changes, the controller needs to walk all
sockets and update the classid value, which was implemented by
3b13758f51de ("cgroups: Allow dynamically changing net_classid").

While the approach is not scalable, migrating tasks which have a lot
of fds attached to them is rare and the cost is born by the ones
initiating the operations.  However, for simplicity, both the
migration and classid config change paths call update_classid() which
scans all fds of all tasks in the target css.  This is an overkill for
the migration path which only needs to cover a much smaller subset of
tasks which are actually getting migrated in.

On cgroup v1, this can lead to unexpected scalability issues when one
tries to migrate a task or process into a net_cls cgroup which already
contains a lot of fds.  Even if the migration traget doesn't have many
to get scanned, update_classid() ends up scanning all fds in the
target cgroup which can be extremely numerous.

Unfortunately, on cgroup v2 which doesn't use net_cls, the problem is
even worse.  Before bfc2cf6f61fc ("cgroup: call subsys->*attach() only
for subsystems which are actually affected by migration"), cgroup core
would call the ->css_attach callback even for controllers which don't
see actual migration to a different css.

As net_cls is always disabled but still mounted on cgroup v2, whenever
a process is migrated on the cgroup v2 hierarchy, net_cls sees
identity migration from root to root and cgroup core used to call
->css_attach callback for those.  The net_cls ->css_attach ends up
calling update_classid() on the root net_cls css to which all
processes on the system belong to as the controller isn't used.  This
makes any cgroup v2 migration O(total_number_of_fds_on_the_system)
which is horrible and easily leads to noticeable stalls triggering RCU
stall warnings and so on.

The worst symptom is already fixed in upstream by bfc2cf6f61fc
("cgroup: call subsys->*attach() only for subsystems which are
actually affected by migration"); however, backporting that commit is
too invasive and we want to avoid other cases too.

This patch updates net_cls's cgrp_attach() to iterate fds of only the
processes which are actually getting migrated.  This removes the
surprising migration cost which is dependent on the total number of
fds in the target cgroup.  As this leaves write_classid() the only
user of update_classid(), open-code the helper into write_classid().

Reported-by: David Goode <dgoode@fb.com>
Fixes: 3b13758f51de ("cgroups: Allow dynamically changing net_classid")
Cc: stable@vger.kernel.org # v4.4+
Cc: Nina Schiff <ninasc@fb.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netclassid_cgroup.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6ae56037bb13..029a61ac6cdd 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
 {
-	struct css_task_iter it;
+	struct cgroup_subsys_state *css;
 	struct task_struct *p;
 
-	css_task_iter_start(css, &it);
-	while ((p = css_task_iter_next(&it))) {
+	cgroup_taskset_for_each(p, css, tset) {
 		task_lock(p);
-		iterate_fd(p->files, 0, update_classid_sock, v);
+		iterate_fd(p->files, 0, update_classid_sock,
+			   (void *)(unsigned long)css_cls_state(css)->classid);
 		task_unlock(p);
 	}
-	css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
-	struct cgroup_subsys_state *css;
-
-	cgroup_taskset_first(tset, &css);
-	update_classid(css,
-		       (void *)(unsigned long)css_cls_state(css)->classid);
 }
 
 static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 			 u64 value)
 {
 	struct cgroup_cls_state *cs = css_cls_state(css);
+	struct css_task_iter it;
+	struct task_struct *p;
 
 	cgroup_sk_alloc_disable();
 
 	cs->classid = (u32)value;
 
-	update_classid(css, (void *)(unsigned long)cs->classid);
+	css_task_iter_start(css, &it);
+	while ((p = css_task_iter_next(&it))) {
+		task_lock(p);
+		iterate_fd(p->files, 0, update_classid_sock,
+			   (void *)(unsigned long)cs->classid);
+		task_unlock(p);
+	}
+	css_task_iter_end(&it);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 210c4f70b4c630b27f0840c8043c138c955edc9e Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Mon, 20 Mar 2017 16:13:44 +0800
Subject: r8152: set the RMS of RTL8153 according to the mtu

Set the received maximum size (RMS) according to the mtu size. It is
unnecessary to receive a packet which is more than the size we could
transmit. Besides, this could let the rx buffer be used effectively.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index bb3eedd07fbe..525c25817013 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -2899,7 +2899,8 @@ static void r8153_first_init(struct r8152 *tp)
 
 	rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+	ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
@@ -2951,7 +2952,8 @@ static void r8153_enter_oob(struct r8152 *tp)
 		usleep_range(1000, 2000);
 	}
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+	ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
 	ocp_data &= ~TEREDO_WAKE_MASK;
@@ -4201,8 +4203,14 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
 
 	dev->mtu = new_mtu;
 
-	if (netif_running(dev) && netif_carrier_ok(dev))
-		r8153_set_rx_early_size(tp);
+	if (netif_running(dev)) {
+		u32 rms = new_mtu + VLAN_ETH_HLEN + CRC_SIZE;
+
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rms);
+
+		if (netif_carrier_ok(dev))
+			r8153_set_rx_early_size(tp);
+	}
 
 	mutex_unlock(&tp->control);
 
-- 
cgit v1.2.3


From b20cb60e2b865638459e6ec82ad3536d3734e555 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Mon, 20 Mar 2017 16:13:45 +0800
Subject: r8152: fix the rx early size of RTL8153

revert commit a59e6d815226 ("r8152: correct the rx early size") and
fix the rx early size as

	(rx buffer size - rx packet size - rx desc size - alignment) / 4

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 525c25817013..0b1b9188625d 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"8"
+#define NET_VERSION		"9"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -501,6 +501,8 @@ enum rtl_register_content {
 #define RTL8153_RMS		RTL8153_MAX_PACKET
 #define RTL8152_TX_TIMEOUT	(5 * HZ)
 #define RTL8152_NAPI_WEIGHT	64
+#define rx_reserved_size(x)	((x) + VLAN_ETH_HLEN + CRC_SIZE + \
+				 sizeof(struct rx_desc) + RX_ALIGN)
 
 /* rtl8152 flags */
 enum rtl8152_flags {
@@ -2253,8 +2255,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
-	u32 mtu = tp->netdev->mtu;
-	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
+	u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }
-- 
cgit v1.2.3


From be9ca0d33c850192198c22518eeb1f41401268e8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Mar 2017 09:52:50 +0100
Subject: cpsw/netcp: work around reverse cpts dependency

The dependency is reversed: cpsw and netcp call into cpts,
but cpts depends on the other two in Kconfig. This can lead
to cpts being a loadable module and its callers built-in:

drivers/net/ethernet/ti/cpsw.o: In function `cpsw_remove':
cpsw.c:(.text.cpsw_remove+0xd0): undefined reference to `cpts_release'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_rx_handler':
cpsw.c:(.text.cpsw_rx_handler+0x2dc): undefined reference to `cpts_rx_timestamp'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_tx_handler':
cpsw.c:(.text.cpsw_tx_handler+0x7c): undefined reference to `cpts_tx_timestamp'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_ndo_stop':

As a workaround, I'm introducing another Kconfig symbol to
control the compilation of cpts, while making the actual
module controlled by a silent symbol that is =y when necessary.

Fixes: 6246168b4a38 ("net: ethernet: ti: netcp: add support of cpts")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig  | 8 +++++++-
 drivers/net/ethernet/ti/Makefile | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 296c8efd0038..d923890a9fda 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,7 +74,7 @@ config TI_CPSW
 	  will be called cpsw.
 
 config TI_CPTS
-	tristate "TI Common Platform Time Sync (CPTS) Support"
+	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP
 	imply PTP_1588_CLOCK
 	---help---
@@ -83,6 +83,12 @@ config TI_CPTS
 	  The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
 	  driver offers a PTP Hardware Clock.
 
+config TI_CPTS_MOD
+	tristate
+	depends on TI_CPTS
+	default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y
+	default m
+
 config TI_KEYSTONE_NETCP
 	tristate "TI Keystone NETCP Core Support"
 	select TI_CPSW_ALE
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index 1e7c10bf8713..10e6b0ce51ba 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
-obj-$(CONFIG_TI_CPTS) += cpts.o
+obj-$(CONFIG_TI_CPTS_MOD) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
 ti_cpsw-y := cpsw.o
 
-- 
cgit v1.2.3


From 07fef3623407444e51c12ea57cd91df38c1069e0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Mar 2017 09:58:33 +0100
Subject: cpsw/netcp: cpts depends on posix_timers

With posix timers having become optional, we get a build error with
the cpts time sync option of the CPSW driver:

drivers/net/ethernet/ti/cpts.c: In function 'cpts_find_ts':
drivers/net/ethernet/ti/cpts.c:291:23: error: implicit declaration of function 'ptp_classify_raw';did you mean 'ptp_classifier_init'? [-Werror=implicit-function-declaration]

This adds a hard dependency on PTP_CLOCK to avoid the problem, as
building it without PTP support makes no sense anyway.

Fixes: baa73d9e478f ("posix-timers: Make them configurable")
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index d923890a9fda..9e631952b86f 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -76,7 +76,7 @@ config TI_CPSW
 config TI_CPTS
 	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP
-	imply PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
 	  the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.
-- 
cgit v1.2.3


From 1511949c61ec63e4b646c34d602ac6990b38ce30 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 20 Mar 2017 17:46:27 +0800
Subject: sctp: declare struct sctp_stream before using it

sctp_stream_free uses struct sctp_stream as a param, but struct sctp_stream
is defined after it's declaration.

This patch is to declare struct sctp_stream before sctp_stream_free.

Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 4f645198e9bd..592decebac75 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -83,6 +83,7 @@ struct sctp_bind_addr;
 struct sctp_ulpq;
 struct sctp_ep_common;
 struct crypto_shash;
+struct sctp_stream;
 
 
 #include <net/sctp/tsnmap.h>
-- 
cgit v1.2.3


From 581947787eaf1ad801959d00b42b9d0131aacb6a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 20 Mar 2017 18:00:28 +0800
Subject: sctp: remove useless err from sctp_association_init

This patch is to remove the unnecessary temporary variable 'err' from
sctp_association_init.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2a6835b4562b..0439a1a68367 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,9 +71,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
-	int i;
 	sctp_paramhdr_t *p;
-	int err;
+	int i;
 
 	/* Retrieve the SCTP per socket area.  */
 	sp = sctp_sk((struct sock *)sk);
@@ -264,8 +263,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* AUTH related initializations */
 	INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
-	err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp);
-	if (err)
+	if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
 		goto fail_init;
 
 	asoc->active_key_id = ep->active_key_id;
-- 
cgit v1.2.3


From 557d054c01da0337ca81de9e9d9206d57245b57e Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 21 Mar 2017 10:47:49 +0100
Subject: tipc: fix nametbl deadlock at tipc_nametbl_unsubscribe

Until now, tipc_nametbl_unsubscribe() is called at subscriptions
reference count cleanup. Usually the subscriptions cleanup is
called at subscription timeout or at subscription cancel or at
subscriber delete.

We have ignored the possibility of this being called from other
locations, which causes deadlock as we try to grab the
tn->nametbl_lock while holding it already.

   CPU1:                             CPU2:
----------                     ----------------
tipc_nametbl_publish
spin_lock_bh(&tn->nametbl_lock)
tipc_nametbl_insert_publ
tipc_nameseq_insert_publ
tipc_subscrp_report_overlap
tipc_subscrp_get
tipc_subscrp_send_event
                             tipc_close_conn
                             tipc_subscrb_release_cb
                             tipc_subscrb_delete
                             tipc_subscrp_put
tipc_subscrp_put
tipc_subscrp_kref_release
tipc_nametbl_unsubscribe
spin_lock_bh(&tn->nametbl_lock)
<<grab nametbl_lock again>>

   CPU1:                              CPU2:
----------                     ----------------
tipc_nametbl_stop
spin_lock_bh(&tn->nametbl_lock)
tipc_purge_publications
tipc_nameseq_remove_publ
tipc_subscrp_report_overlap
tipc_subscrp_get
tipc_subscrp_send_event
                             tipc_close_conn
                             tipc_subscrb_release_cb
                             tipc_subscrb_delete
                             tipc_subscrp_put
tipc_subscrp_put
tipc_subscrp_kref_release
tipc_nametbl_unsubscribe
spin_lock_bh(&tn->nametbl_lock)
<<grab nametbl_lock again>>

In this commit, we advance the calling of tipc_nametbl_unsubscribe()
from the refcount cleanup to the intended callers.

Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete")
Reported-by: John Thompson <thompa.atl@gmail.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 9d94e65d0894..271cd66e4b3b 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 static void tipc_subscrp_timeout(unsigned long data)
 {
 	struct tipc_subscription *sub = (struct tipc_subscription *)data;
+	struct tipc_subscriber *subscriber = sub->subscriber;
+
+	spin_lock_bh(&subscriber->lock);
+	tipc_nametbl_unsubscribe(sub);
+	spin_unlock_bh(&subscriber->lock);
 
 	/* Notify subscriber of timeout */
 	tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
@@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref)
 	struct tipc_subscriber *subscriber = sub->subscriber;
 
 	spin_lock_bh(&subscriber->lock);
-	tipc_nametbl_unsubscribe(sub);
 	list_del(&sub->subscrp_list);
 	atomic_dec(&tn->subscription_count);
 	spin_unlock_bh(&subscriber->lock);
@@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
 		if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
 			continue;
 
+		tipc_nametbl_unsubscribe(sub);
 		tipc_subscrp_get(sub);
 		spin_unlock_bh(&subscriber->lock);
 		tipc_subscrp_delete(sub);
-- 
cgit v1.2.3


From 1f30a86c58093046dc3e49c23d2618894e098f7a Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:12 +0200
Subject: net/mlx5: Add missing entries for set/query rate limit commands

The switch cases for the rate limit set and query commands were
missing, which could get us wrong under fw error or driver reset
flow, fix that.

Fixes: 1466cc5b23d1 ('net/mlx5: Rate limit tables support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index caa837e5e2b9..a380353a78c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -361,6 +361,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_SET_RATE_LIMIT:
+	case MLX5_CMD_OP_QUERY_RATE_LIMIT:
 	case MLX5_CMD_OP_ALLOC_PD:
 	case MLX5_CMD_OP_ALLOC_UAR:
 	case MLX5_CMD_OP_CONFIG_INT_MODERATION:
@@ -497,6 +499,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+	MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+	MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
 	MLX5_COMMAND_STR_CASE(ALLOC_PD);
 	MLX5_COMMAND_STR_CASE(DEALLOC_PD);
 	MLX5_COMMAND_STR_CASE(ALLOC_UAR);
-- 
cgit v1.2.3


From d85cdccbb3fe9a632ec9d0f4e4526c8c84fc3523 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:13 +0200
Subject: net/mlx5e: Change the TC offload rule add/del code path to be per NIC
 or E-Switch

Refactor the code to deal with add/del TC rules to have handler per NIC/E-switch
offloading use case, and push the latter into the e-switch code. This provides
better separation and is to be used in down-stream patch for applying a fix.

Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 59 ++++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  5 ++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++++
 3 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 79481f4cf264..2825b5665456 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -133,6 +133,23 @@ err_create_ft:
 	return rule;
 }
 
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+				  struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_fc *counter = NULL;
+
+	if (!IS_ERR(flow->rule)) {
+		counter = mlx5_flow_rule_counter(flow->rule);
+		mlx5_del_flow_rules(flow->rule);
+		mlx5_fc_destroy(priv->mdev, counter);
+	}
+
+	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+		mlx5_destroy_flow_table(priv->fs.tc.t);
+		priv->fs.tc.t = NULL;
+	}
+}
+
 static struct mlx5_flow_handle *
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct mlx5_flow_spec *spec,
@@ -149,7 +166,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-			       struct mlx5e_tc_flow *flow) {
+			       struct mlx5e_tc_flow *flow);
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+				  struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr);
+
+	mlx5_eswitch_del_vlan_action(esw, flow->attr);
+
+	if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+		mlx5e_detach_encap(priv, flow);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow)
+{
 	struct list_head *next = flow->encap.next;
 
 	list_del(&flow->encap);
@@ -173,25 +207,10 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_fc *counter = NULL;
-
-	if (!IS_ERR(flow->rule)) {
-		counter = mlx5_flow_rule_counter(flow->rule);
-		mlx5_del_flow_rules(flow->rule);
-		mlx5_fc_destroy(priv->mdev, counter);
-	}
-
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-		mlx5_eswitch_del_vlan_action(esw, flow->attr);
-		if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-			mlx5e_detach_encap(priv, flow);
-	}
-
-	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
-		mlx5_destroy_flow_table(priv->fs.tc.t);
-		priv->fs.tc.t = NULL;
-	}
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+		mlx5e_tc_del_fdb_flow(priv, flow);
+	else
+		mlx5e_tc_del_nic_flow(priv, flow);
 }
 
 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5b78883d5654..9227a83a97e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -271,6 +271,11 @@ struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_handle *rule,
+				struct mlx5_esw_flow_attr *attr);
+
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 4f5b0d47d5f3..bfabefe20ac0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -97,6 +97,20 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	return rule;
 }
 
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_handle *rule,
+				struct mlx5_esw_flow_attr *attr)
+{
+	struct mlx5_fc *counter = NULL;
+
+	if (!IS_ERR(rule)) {
+		counter = mlx5_flow_rule_counter(rule);
+		mlx5_del_flow_rules(rule);
+		mlx5_fc_destroy(esw->dev, counter);
+	}
+}
+
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 {
 	struct mlx5_eswitch_rep *rep;
-- 
cgit v1.2.3


From 375f51e2b5b7b9a42b3139aea519cbb1bfc5d6ef Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:14 +0200
Subject: net/mlx5: E-Switch, Don't allow changing inline mode when flows are
 configured

Changing the eswitch inline mode can potentially cause already configured
flows not to match the policy. E.g. set policy L4, add some L4 rules,
set policy to L2 --> bad! Hence we disallow it.

Keep track of how many offloaded rules are now set and refuse
inline mode changes if this isn't zero.

Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9227a83a97e3..ad329b1680b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -209,6 +209,7 @@ struct mlx5_esw_offload {
 	struct mlx5_eswitch_rep *vport_reps;
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	u8 inline_mode;
+	u64 num_flows;
 };
 
 struct mlx5_eswitch {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bfabefe20ac0..307ec6c5fd3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -93,6 +93,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				   spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		mlx5_fc_destroy(esw->dev, counter);
+	else
+		esw->offloads.num_flows++;
 
 	return rule;
 }
@@ -108,6 +110,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 		counter = mlx5_flow_rule_counter(rule);
 		mlx5_del_flow_rules(rule);
 		mlx5_fc_destroy(esw->dev, counter);
+		esw->offloads.num_flows--;
 	}
 }
 
@@ -922,6 +925,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
 		return -EOPNOTSUPP;
 
+	if (esw->offloads.num_flows > 0) {
+		esw_warn(dev, "Can't set inline mode when flows are configured\n");
+		return -EOPNOTSUPP;
+	}
+
 	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
 	if (err)
 		goto out;
-- 
cgit v1.2.3


From 09c91ddf2cd33489c2c14edfef43ae38d412888e Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:15 +0200
Subject: net/mlx5e: Use the proper UAPI values when offloading TC vlan actions

Currently we use the non UAPI values and we miss erring on
the modify action which is not supported, fix that.

Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2825b5665456..9c13abaf3885 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1131,14 +1131,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 		}
 
 		if (is_tcf_vlan(a)) {
-			if (tcf_vlan_action(a) == VLAN_F_POP) {
+			if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
-			} else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+			} else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
 				if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
 					return -EOPNOTSUPP;
 
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
 				attr->vlan = tcf_vlan_push_vid(a);
+			} else { /* action is TCA_VLAN_ACT_MODIFY */
+				return -EOPNOTSUPP;
 			}
 			continue;
 		}
-- 
cgit v1.2.3


From 1ad9a00ae0efc2e9337148d6c382fad3d27bf99a Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:16 +0200
Subject: net/mlx5e: Avoid supporting udp tunnel port ndo for VF reps

This was added to allow the TC offloading code to identify offloading
encap/decap vxlan rules.

The VF reps are effectively related to the same mlx5 PCI device as the
PF. Since the kernel invokes the (say) delete ndo for each netdev, the
FW erred on multiple vxlan dst port deletes when the port was deleted
from the system.

We fix that by keeping the registration to be carried out only by the
PF. Since the PF serves as the uplink device, the VF reps will look
up a port there and realize if they are ok to offload that.

Tested:
 <SETUP VFS>
 <SETUP switchdev mode to have representors>
 ip link add vxlan1 type vxlan id 44 dev ens5f0 dstport 9999
 ip link set vxlan1 up
 ip link del dev vxlan1

Fixes: 4a25730eb202 ('net/mlx5e: Add ndo_udp_tunnel_add to VF representors')
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 4 ----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 2 --
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 9 +++++++--
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f6a6ded204f6..dc52053128bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -928,10 +928,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
 int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti);
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti);
 
 int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
 			    void *sp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8ef64c4db2c2..66c133757a5e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3100,8 +3100,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
 					    vf_stats);
 }
 
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti)
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+				 struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3114,8 +3114,8 @@ void mlx5e_add_vxlan_port(struct net_device *netdev,
 	mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
 }
 
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti)
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+				 struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 2c864574a9d5..f621373bd7a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -393,8 +393,6 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
 	.ndo_get_stats64         = mlx5e_rep_get_stats,
-	.ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
-	.ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
 	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9c13abaf3885..fade7233dac5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -267,12 +267,15 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 			skb_flow_dissector_target(f->dissector,
 						  FLOW_DISSECTOR_KEY_ENC_PORTS,
 						  f->mask);
+		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+		struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+		struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 
 		/* Full udp dst port must be given */
 		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
 			goto vxlan_match_offload_err;
 
-		if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
+		if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
 		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 			parse_vxlan_attr(spec, f);
 		else {
@@ -995,6 +998,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+	struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 	unsigned short family = ip_tunnel_info_af(tun_info);
 	struct ip_tunnel_key *key = &tun_info->key;
 	struct mlx5_encap_entry *e;
@@ -1015,7 +1020,7 @@ vxlan_encap_offload_err:
 		return -EOPNOTSUPP;
 	}
 
-	if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
+	if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
 		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
 	} else {
-- 
cgit v1.2.3


From 5f40b4ed975c26016cf41953b7510fe90718e21c Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:17 +0200
Subject: net/mlx5: Increase number of max QPs in default profile

With ConnectX-4 sharing SRQs from the same space as QPs, we hit a
limit preventing some applications to allocate needed QPs amount.
Double the size to 256K.

Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index e2bd600d19de..60154a175bd3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = {
 	[2] = {
 		.mask		= MLX5_PROF_MASK_QP_SIZE |
 				  MLX5_PROF_MASK_MR_CACHE,
-		.log_max_qp	= 17,
+		.log_max_qp	= 18,
 		.mr_cache[0]	= {
 			.size	= 500,
 			.limit	= 250
-- 
cgit v1.2.3


From d3a4e4da54c7adb420d5f48e89be913b14bdeff1 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:18 +0200
Subject: net/mlx5e: Count GSO packets correctly

TX packets statistics ('tx_packets' counter) used to count GSO packets
as one, even though it contains multiple segments.
This patch will increment the counter by the number of segments, and
align the driver with the behavior of other drivers in the stack.

Note that no information is lost in this patch due to 'tx_tso_packets'
counter existence.

Before, ethtool showed:
$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
     tx_packets: 61340
     tx_tso_packets: 60954
     tx_packets_phy: 2451115

Now, we will see the more logical statistics:
$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
     tx_packets: 2451115
     tx_tso_packets: 60954
     tx_packets_phy: 2451115

Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f193128bac4b..57f5e2d7ebd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -274,15 +274,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 			sq->stats.tso_bytes += skb->len - ihs;
 		}
 
+		sq->stats.packets += skb_shinfo(skb)->gso_segs;
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 	} else {
 		bf = sq->bf_budget &&
 		     !skb->xmit_more &&
 		     !skb_shinfo(skb)->nr_frags;
 		ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
+		sq->stats.packets++;
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 	}
 
+	sq->stats.bytes += num_bytes;
 	wi->num_bytes = num_bytes;
 
 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
@@ -381,8 +384,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 	if (bf)
 		sq->bf_budget--;
 
-	sq->stats.packets++;
-	sq->stats.bytes += num_bytes;
 	return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
-- 
cgit v1.2.3


From 8ab7e2ae15d84ba758b2c8c6f4075722e9bd2a08 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:19 +0200
Subject: net/mlx5e: Count LRO packets correctly

RX packets statistics ('rx_packets' counter) used to count LRO packets
as one, even though it contains multiple segments.
This patch will increment the counter by the number of segments, and
align the driver with the behavior of other drivers in the stack.

Note that no information is lost in this patch due to 'rx_lro_packets'
counter existence.

Before, ethtool showed:
$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
     rx_packets: 435277
     rx_lro_packets: 35847
     rx_packets_phy: 1935066

Now, we will see the more logical statistics:
$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
     rx_packets: 1935066
     rx_lro_packets: 35847
     rx_packets_phy: 1935066

Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3d371688fbbb..bafcb349a50c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -601,6 +601,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 	if (lro_num_seg > 1) {
 		mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
 		skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+		/* Subtract one since we already counted this as one
+		 * "regular" packet in mlx5e_complete_rx_cqe()
+		 */
+		rq->stats.packets += lro_num_seg - 1;
 		rq->stats.lro_packets++;
 		rq->stats.lro_bytes += cqe_bcnt;
 	}
-- 
cgit v1.2.3


From ac23d3cac1f339febd95c403a245ae072dfd0e84 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Date: Tue, 21 Mar 2017 11:44:25 -0400
Subject: fjes: Do not load fjes driver if system does not have extended socket
 device.

The fjes driver is used only by FUJITSU servers and almost of all
servers in the world never use it. But currently if ACPI PNP0C02
is defined in the ACPI table, the following message is always shown:

 "FUJITSU Extended Socket Network Device Driver - version 1.2
  - Copyright (c) 2015 FUJITSU LIMITED"

The message makes users confused because there is no reason that
the message is shown in other vendor servers.

To avoid the confusion, the patch adds a check that the server
has a extended socket device or not.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
CC: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 52 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index c4b3c4b77a9c..7b589649ab46 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -45,6 +45,8 @@ MODULE_DESCRIPTION("FUJITSU Extended Socket Network Device Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+#define ACPI_MOTHERBOARD_RESOURCE_HID "PNP0C02"
+
 static int fjes_request_irq(struct fjes_adapter *);
 static void fjes_free_irq(struct fjes_adapter *);
 
@@ -78,7 +80,7 @@ static void fjes_rx_irq(struct fjes_adapter *, int);
 static int fjes_poll(struct napi_struct *, int);
 
 static const struct acpi_device_id fjes_acpi_ids[] = {
-	{"PNP0C02", 0},
+	{ACPI_MOTHERBOARD_RESOURCE_HID, 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, fjes_acpi_ids);
@@ -115,18 +117,17 @@ static struct resource fjes_resource[] = {
 	},
 };
 
-static int fjes_acpi_add(struct acpi_device *device)
+static bool is_extended_socket_device(struct acpi_device *device)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
 	char str_buf[sizeof(FJES_ACPI_SYMBOL) + 1];
-	struct platform_device *plat_dev;
 	union acpi_object *str;
 	acpi_status status;
 	int result;
 
 	status = acpi_evaluate_object(device->handle, "_STR", NULL, &buffer);
 	if (ACPI_FAILURE(status))
-		return -ENODEV;
+		return false;
 
 	str = buffer.pointer;
 	result = utf16s_to_utf8s((wchar_t *)str->string.pointer,
@@ -136,10 +137,21 @@ static int fjes_acpi_add(struct acpi_device *device)
 
 	if (strncmp(FJES_ACPI_SYMBOL, str_buf, strlen(FJES_ACPI_SYMBOL)) != 0) {
 		kfree(buffer.pointer);
-		return -ENODEV;
+		return false;
 	}
 	kfree(buffer.pointer);
 
+	return true;
+}
+
+static int fjes_acpi_add(struct acpi_device *device)
+{
+	struct platform_device *plat_dev;
+	acpi_status status;
+
+	if (!is_extended_socket_device(device))
+		return -ENODEV;
+
 	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
 				     fjes_get_acpi_resource, fjes_resource);
 	if (ACPI_FAILURE(status))
@@ -1473,11 +1485,41 @@ static void fjes_watch_unshare_task(struct work_struct *work)
 	}
 }
 
+static acpi_status
+acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
+				 void *context, void **return_value)
+{
+	struct acpi_device *device;
+	bool *found = context;
+	int result;
+
+	result = acpi_bus_get_device(obj_handle, &device);
+	if (result)
+		return AE_OK;
+
+	if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
+		return AE_OK;
+
+	if (!is_extended_socket_device(device))
+		return AE_OK;
+
+	*found = true;
+	return AE_CTRL_TERMINATE;
+}
+
 /* fjes_init_module - Driver Registration Routine */
 static int __init fjes_init_module(void)
 {
+	bool found = false;
 	int result;
 
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+			    acpi_find_extended_socket_device, NULL, &found,
+			    NULL);
+
+	if (!found)
+		return -ENODEV;
+
 	pr_info("%s - version %s - %s\n",
 		fjes_driver_string, fjes_driver_version, fjes_copyright);
 
-- 
cgit v1.2.3


From 2b396d302650f1ebb770ed758ddcf5a64328ffd5 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Date: Tue, 21 Mar 2017 11:46:35 -0400
Subject: fjes: Do not load fjes driver if extended socket device is not power
 on.

The extended device socket cannot turn on/off while system is running.
So when system boots up and the device is not power on, the fjes driver
does not need be loaded.

To check the status of the device, the patch adds ACPI _STA method check.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
CC: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 7b589649ab46..ae48c809bac9 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -144,6 +144,24 @@ static bool is_extended_socket_device(struct acpi_device *device)
 	return true;
 }
 
+static int acpi_check_extended_socket_status(struct acpi_device *device)
+{
+	unsigned long long sta;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(device->handle, "_STA", NULL, &sta);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (!((sta & ACPI_STA_DEVICE_PRESENT) &&
+	      (sta & ACPI_STA_DEVICE_ENABLED) &&
+	      (sta & ACPI_STA_DEVICE_UI) &&
+	      (sta & ACPI_STA_DEVICE_FUNCTIONING)))
+		return -ENODEV;
+
+	return 0;
+}
+
 static int fjes_acpi_add(struct acpi_device *device)
 {
 	struct platform_device *plat_dev;
@@ -152,6 +170,9 @@ static int fjes_acpi_add(struct acpi_device *device)
 	if (!is_extended_socket_device(device))
 		return -ENODEV;
 
+	if (acpi_check_extended_socket_status(device))
+		return -ENODEV;
+
 	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
 				     fjes_get_acpi_resource, fjes_resource);
 	if (ACPI_FAILURE(status))
@@ -1503,6 +1524,9 @@ acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
 	if (!is_extended_socket_device(device))
 		return AE_OK;
 
+	if (acpi_check_extended_socket_status(device))
+		return AE_OK;
+
 	*found = true;
 	return AE_CTRL_TERMINATE;
 }
-- 
cgit v1.2.3


From d515684d78148884d5fc425ba904c50f03844020 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 21 Mar 2017 17:14:27 +0100
Subject: ipv6: make sure to initialize sockc.tsflags before first use

In the case udp_sk(sk)->pending is AF_INET6, udpv6_sendmsg() would
jump to do_append_data, skipping the initialization of sockc.tsflags.
Fix the problem by moving sockc.tsflags initialization earlier.

The bug was detected with KMSAN.

Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4e4c401e3bc6..e28082f0a307 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1035,6 +1035,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	ipc6.hlimit = -1;
 	ipc6.tclass = -1;
 	ipc6.dontfrag = -1;
+	sockc.tsflags = sk->sk_tsflags;
 
 	/* destination address check */
 	if (sin6) {
@@ -1159,7 +1160,6 @@ do_udp_sendmsg:
 
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
-	sockc.tsflags = sk->sk_tsflags;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
-- 
cgit v1.2.3


From 31739eae738ccbe8b9d627c3f2251017ca03f4d2 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Tue, 21 Mar 2017 14:01:06 -0700
Subject: net: bcmgenet: remove bcmgenet_internal_phy_setup()

Commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
removed the bcmgenet_mii_reset() function from bcmgenet_power_up() and
bcmgenet_internal_phy_setup() functions.  In so doing it broke the reset
of the internal PHY devices used by the GENETv1-GENETv3 which required
this reset before the UniMAC was enabled.  It also broke the internal
GPHY devices used by the GENETv4 because the config_init that installed
the AFE workaround was no longer occurring after the reset of the GPHY
performed by bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup().
In addition the code in bcmgenet_internal_phy_setup() related to the
"enable APD" comment goes with the bcmgenet_mii_reset() so it should
have also been removed.

Commit bd4060a6108b ("net: bcmgenet: Power on integrated GPHY in
bcmgenet_power_up()") moved the bcmgenet_phy_power_set() call to the
bcmgenet_power_up() function, but failed to remove it from the
bcmgenet_internal_phy_setup() function.  Had it done so, the
bcmgenet_internal_phy_setup() function would have been empty and could
have been removed at that time.

Commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on")
was submitted to correct the functional problems introduced by
commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset"). It
was included in v4.4 and made available on 4.3-stable. Unfortunately,
it didn't fully revert the commit because this bcmgenet_mii_reset()
doesn't apply the soft reset to the internal GPHY used by GENETv4 like
the previous one did. This prevents the restoration of the AFE work-
arounds for internal GPHY devices after the bcmgenet_phy_power_set() in
bcmgenet_internal_phy_setup().

This commit takes the alternate approach of removing the unnecessary
bcmgenet_internal_phy_setup() function which shouldn't have been in v4.3
so that when bcmgenet_mii_reset() was restored it should have only gone
into bcmgenet_power_up().  This will avoid the problems while also
removing the redundancy (and hopefully some of the confusion).

Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e87607621e62..2f9281936f0e 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
 	udelay(60);
 }
 
-static void bcmgenet_internal_phy_setup(struct net_device *dev)
-{
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
-
-	/* Power up PHY */
-	bcmgenet_phy_power_set(dev, true);
-	/* enable APD */
-	reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
-	reg |= EXT_PWR_DN_EN_LD;
-	bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-	bcmgenet_mii_reset(dev);
-}
-
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
 	u32 reg;
@@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_device *dev)
 
 		if (priv->internal_phy) {
 			phy_name = "internal PHY";
-			bcmgenet_internal_phy_setup(dev);
 		} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
 			phy_name = "MoCA";
 			bcmgenet_moca_phy_setup(priv);
-- 
cgit v1.2.3


From dd1ef79120e1600cb48320cf80a612ee6510110c Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Tue, 21 Mar 2017 15:07:48 -0700
Subject: enic: update enic maintainers

update enic maintainers

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 078c38217daa..c45c02bc6082 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3216,7 +3216,6 @@ F:	drivers/platform/chrome/
 
 CISCO VIC ETHERNET NIC DRIVER
 M:	Christian Benvenuti <benve@cisco.com>
-M:	Sujith Sankar <ssujith@cisco.com>
 M:	Govindarajulu Varadarajan <_govind@gmx.com>
 M:	Neel Patel <neepatel@cisco.com>
 S:	Supported
-- 
cgit v1.2.3


From 8c290e60fa2a51806159522331c9ed41252a8fb3 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 21 Mar 2017 19:05:04 -0700
Subject: bpf: fix hashmap extra_elems logic

In both kmalloc and prealloc mode the bpf_map_update_elem() is using
per-cpu extra_elems to do atomic update when the map is full.
There are two issues with it. The logic can be misused, since it allows
max_entries+num_cpus elements to be present in the map. And alloc_extra_elems()
at map creation time can fail percpu alloc for large map values with a warn:
WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60
illegal size (32824) or align (8) for percpu allocation

The fixes for both of these issues are different for kmalloc and prealloc modes.
For prealloc mode allocate extra num_possible_cpus elements and store
their pointers into extra_elems array instead of actual elements.
Hence we can use these hidden(spare) elements not only when the map is full
but during bpf_map_update_elem() that replaces existing element too.
That also improves performance, since pcpu_freelist_pop/push is avoided.
Unfortunately this approach cannot be used for kmalloc mode which needs
to kfree elements after rcu grace period. Therefore switch it back to normal
kmalloc even when full and old element exists like it was prior to
commit 6c9059817432 ("bpf: pre-allocate hash map elements").

Add tests to check for over max_entries and large map values.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/hashtab.c                    | 144 ++++++++++++++++----------------
 tools/testing/selftests/bpf/test_maps.c |  29 ++++++-
 2 files changed, 97 insertions(+), 76 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index afe5bab376c9..361a69dfe543 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -30,18 +30,12 @@ struct bpf_htab {
 		struct pcpu_freelist freelist;
 		struct bpf_lru lru;
 	};
-	void __percpu *extra_elems;
+	struct htab_elem *__percpu *extra_elems;
 	atomic_t count;	/* number of elements in this hashtable */
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
 };
 
-enum extra_elem_state {
-	HTAB_NOT_AN_EXTRA_ELEM = 0,
-	HTAB_EXTRA_ELEM_FREE,
-	HTAB_EXTRA_ELEM_USED
-};
-
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
@@ -56,7 +50,6 @@ struct htab_elem {
 	};
 	union {
 		struct rcu_head rcu;
-		enum extra_elem_state state;
 		struct bpf_lru_node lru_node;
 	};
 	u32 hash;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
 		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
 				     void __percpu *pptr)
 {
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
 
 static int prealloc_init(struct bpf_htab *htab)
 {
+	u32 num_entries = htab->map.max_entries;
 	int err = -ENOMEM, i;
 
-	htab->elems = bpf_map_area_alloc(htab->elem_size *
-					 htab->map.max_entries);
+	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+		num_entries += num_possible_cpus();
+
+	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
 	if (!htab->elems)
 		return -ENOMEM;
 
 	if (!htab_is_percpu(htab))
 		goto skip_percpu_elems;
 
-	for (i = 0; i < htab->map.max_entries; i++) {
+	for (i = 0; i < num_entries; i++) {
 		u32 size = round_up(htab->map.value_size, 8);
 		void __percpu *pptr;
 
@@ -166,11 +167,11 @@ skip_percpu_elems:
 	if (htab_is_lru(htab))
 		bpf_lru_populate(&htab->lru, htab->elems,
 				 offsetof(struct htab_elem, lru_node),
-				 htab->elem_size, htab->map.max_entries);
+				 htab->elem_size, num_entries);
 	else
 		pcpu_freelist_populate(&htab->freelist,
 				       htab->elems + offsetof(struct htab_elem, fnode),
-				       htab->elem_size, htab->map.max_entries);
+				       htab->elem_size, num_entries);
 
 	return 0;
 
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
 
 static int alloc_extra_elems(struct bpf_htab *htab)
 {
-	void __percpu *pptr;
+	struct htab_elem *__percpu *pptr, *l_new;
+	struct pcpu_freelist_node *l;
 	int cpu;
 
-	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+	pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+				  GFP_USER | __GFP_NOWARN);
 	if (!pptr)
 		return -ENOMEM;
 
 	for_each_possible_cpu(cpu) {
-		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
-			HTAB_EXTRA_ELEM_FREE;
+		l = pcpu_freelist_pop(&htab->freelist);
+		/* pop will succeed, since prealloc_init()
+		 * preallocated extra num_possible_cpus elements
+		 */
+		l_new = container_of(l, struct htab_elem, fnode);
+		*per_cpu_ptr(pptr, cpu) = l_new;
 	}
 	htab->extra_elems = pptr;
 	return 0;
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
-	if (!percpu && !lru) {
-		/* lru itself can remove the least used element, so
-		 * there is no need for an extra elem during map_update.
-		 */
-		err = alloc_extra_elems(htab);
-		if (err)
-			goto free_buckets;
-	}
-
 	if (prealloc) {
 		err = prealloc_init(htab);
 		if (err)
-			goto free_extra_elems;
+			goto free_buckets;
+
+		if (!percpu && !lru) {
+			/* lru itself can remove the least used element, so
+			 * there is no need for an extra elem during map_update.
+			 */
+			err = alloc_extra_elems(htab);
+			if (err)
+				goto free_prealloc;
+		}
 	}
 
 	return &htab->map;
 
-free_extra_elems:
-	free_percpu(htab->extra_elems);
+free_prealloc:
+	prealloc_destroy(htab);
 free_buckets:
 	bpf_map_area_free(htab->buckets);
 free_htab:
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
-	if (l->state == HTAB_EXTRA_ELEM_USED) {
-		l->state = HTAB_EXTRA_ELEM_FREE;
-		return;
-	}
-
-	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+	if (htab_is_prealloc(htab)) {
 		pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
 		atomic_dec(&htab->count);
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
 					 bool percpu, bool onallcpus,
-					 bool old_elem_exists)
+					 struct htab_elem *old_elem)
 {
 	u32 size = htab->map.value_size;
-	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
-	struct htab_elem *l_new;
+	bool prealloc = htab_is_prealloc(htab);
+	struct htab_elem *l_new, **pl_new;
 	void __percpu *pptr;
-	int err = 0;
 
 	if (prealloc) {
-		struct pcpu_freelist_node *l;
+		if (old_elem) {
+			/* if we're updating the existing element,
+			 * use per-cpu extra elems to avoid freelist_pop/push
+			 */
+			pl_new = this_cpu_ptr(htab->extra_elems);
+			l_new = *pl_new;
+			*pl_new = old_elem;
+		} else {
+			struct pcpu_freelist_node *l;
 
-		l = pcpu_freelist_pop(&htab->freelist);
-		if (!l)
-			err = -E2BIG;
-		else
+			l = pcpu_freelist_pop(&htab->freelist);
+			if (!l)
+				return ERR_PTR(-E2BIG);
 			l_new = container_of(l, struct htab_elem, fnode);
-	} else {
-		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-			atomic_dec(&htab->count);
-			err = -E2BIG;
-		} else {
-			l_new = kmalloc(htab->elem_size,
-					GFP_ATOMIC | __GFP_NOWARN);
-			if (!l_new)
-				return ERR_PTR(-ENOMEM);
 		}
-	}
-
-	if (err) {
-		if (!old_elem_exists)
-			return ERR_PTR(err);
-
-		/* if we're updating the existing element and the hash table
-		 * is full, use per-cpu extra elems
-		 */
-		l_new = this_cpu_ptr(htab->extra_elems);
-		if (l_new->state != HTAB_EXTRA_ELEM_FREE)
-			return ERR_PTR(-E2BIG);
-		l_new->state = HTAB_EXTRA_ELEM_USED;
 	} else {
-		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
+		if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+			if (!old_elem) {
+				/* when map is full and update() is replacing
+				 * old element, it's ok to allocate, since
+				 * old element will be freed immediately.
+				 * Otherwise return an error
+				 */
+				atomic_dec(&htab->count);
+				return ERR_PTR(-E2BIG);
+			}
+		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+		if (!l_new)
+			return ERR_PTR(-ENOMEM);
 	}
 
 	memcpy(l_new->key, key, key_size);
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		goto err;
 
 	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
-				!!l_old);
+				l_old);
 	if (IS_ERR(l_new)) {
 		/* all pre-allocated elements are in use or memory exhausted */
 		ret = PTR_ERR(l_new);
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
 		hlist_nulls_del_rcu(&l_old->hash_node);
-		free_htab_elem(htab, l_old);
+		if (!htab_is_prealloc(htab))
+			free_htab_elem(htab, l_old);
 	}
 	ret = 0;
 err:
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 				value, onallcpus);
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true, onallcpus, false);
+					hash, true, onallcpus, NULL);
 		if (IS_ERR(l_new)) {
 			ret = PTR_ERR(l_new);
 			goto err;
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
 
 		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
 			hlist_nulls_del_rcu(&l->hash_node);
-			if (l->state != HTAB_EXTRA_ELEM_USED)
-				htab_elem_free(htab, l);
+			htab_elem_free(htab, l);
 		}
 	}
 }
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
 	 * not have executed. Wait for them.
 	 */
 	rcu_barrier();
-	if (htab->map.map_flags & BPF_F_NO_PREALLOC)
+	if (!htab_is_prealloc(htab))
 		delete_all_elements(htab);
 	else
 		prealloc_destroy(htab);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index cada17ac00b8..a0aa2009b0e0 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -80,8 +80,9 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
 	key = 2;
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
-	key = 1;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+	key = 3;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
 	key = 0;
@@ -110,6 +111,24 @@ static void test_hashmap(int task, void *data)
 	close(fd);
 }
 
+static void test_hashmap_sizes(int task, void *data)
+{
+	int fd, i, j;
+
+	for (i = 1; i <= 512; i <<= 1)
+		for (j = 1; j <= 1 << 18; j <<= 1) {
+			fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+					    2, map_flags);
+			if (fd < 0) {
+				printf("Failed to create hashmap key=%d value=%d '%s'\n",
+				       i, j, strerror(errno));
+				exit(1);
+			}
+			close(fd);
+			usleep(10); /* give kernel time to destroy */
+		}
+}
+
 static void test_hashmap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -317,7 +336,10 @@ static void test_arraymap_percpu(int task, void *data)
 static void test_arraymap_percpu_many_keys(void)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
-	unsigned int nr_keys = 20000;
+	/* nr_keys is not too large otherwise the test stresses percpu
+	 * allocator more than anything else
+	 */
+	unsigned int nr_keys = 2000;
 	long values[nr_cpus];
 	int key, fd, i;
 
@@ -419,6 +441,7 @@ static void test_map_stress(void)
 {
 	run_parallel(100, test_hashmap, NULL);
 	run_parallel(100, test_hashmap_percpu, NULL);
+	run_parallel(100, test_hashmap_sizes, NULL);
 
 	run_parallel(100, test_arraymap, NULL);
 	run_parallel(100, test_arraymap_percpu, NULL);
-- 
cgit v1.2.3


From c64c0b3cac4c5b8cb093727d2c19743ea3965c0b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Mar 2017 19:22:28 -0700
Subject: ipv4: provide stronger user input validation in nl_fib_input()

Alexander reported a KMSAN splat caused by reads of uninitialized
field (tb_id_in) from user provided struct fib_result_nl

It turns out nl_fib_input() sanity tests on user input is a bit
wrong :

User can pretend nlh->nlmsg_len is big enough, but provide
at sendmsg() time a too small buffer.

Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 42bfd08109dd..8f2133ffc2ff 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb)
 
 	net = sock_net(skb->sk);
 	nlh = nlmsg_hdr(skb);
-	if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+	if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+	    skb->len < nlh->nlmsg_len ||
 	    nlmsg_len(nlh) < sizeof(*frn))
 		return;
 
-- 
cgit v1.2.3


From a97e50cc4cb67e1e7bff56f6b41cda62ca832336 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 22 Mar 2017 13:08:08 +0100
Subject: socket, bpf: fix sk_filter use after free in sk_clone_lock

In sk_clone_lock(), we create a new socket and inherit most of the
parent's members via sock_copy() which memcpy()'s various sections.
Now, in case the parent socket had a BPF socket filter attached,
then newsk->sk_filter points to the same instance as the original
sk->sk_filter.

sk_filter_charge() is then called on the newsk->sk_filter to take a
reference and should that fail due to hitting max optmem, we bail
out and release the newsk instance.

The issue is that commit 278571baca2a ("net: filter: simplify socket
charging") wrongly combined the dismantle path with the failure path
of xfrm_sk_clone_policy(). This means, even when charging failed, we
call sk_free_unlock_clone() on the newsk, which then still points to
the same sk_filter as the original sk.

Thus, sk_free_unlock_clone() calls into __sk_destruct() eventually
where it tests for present sk_filter and calls sk_filter_uncharge()
on it, which potentially lets sk_omem_alloc wrap around and releases
the eBPF prog and sk_filter structure from the (still intact) parent.

Fix it by making sure that when sk_filter_charge() failed, we reset
newsk->sk_filter back to NULL before passing to sk_free_unlock_clone(),
so that we don't mess with the parents sk_filter.

Only if xfrm_sk_clone_policy() fails, we did reach the point where
either the parent's filter was NULL and as a result newsk's as well
or where we previously had a successful sk_filter_charge(), thus for
that case, we do need sk_filter_uncharge() to release the prior taken
reference on sk_filter.

Fixes: 278571baca2a ("net: filter: simplify socket charging")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/sock.c b/net/core/sock.c
index acb0d4137499..2c4f574168fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1544,6 +1544,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 			is_charged = sk_filter_charge(newsk, filter);
 
 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+			/* We need to make sure that we don't uncharge the new
+			 * socket if we couldn't charge it in the first place
+			 * as otherwise we uncharge the parent's filter.
+			 */
+			if (!is_charged)
+				RCU_INIT_POINTER(newsk->sk_filter, NULL);
 			sk_free_unlock_clone(newsk);
 			newsk = NULL;
 			goto out;
-- 
cgit v1.2.3


From 1d2a6a5e4bf2921531071fcff8538623dce74efa Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 22 Mar 2017 16:08:33 +0100
Subject: genetlink: fix counting regression on ctrl_dumpfamily()

Commit 2ae0f17df1cd ("genetlink: use idr to track families") replaced

	if (++n < fams_to_skip)
		continue;
into:

	if (n++ < fams_to_skip)
		continue;

This subtle change cause that on retry ctrl_dumpfamily() call we omit
one family that failed to do ctrl_fill_info() on previous call, because
cb->args[0] = n number counts also family that failed to do
ctrl_fill_info().

Patch fixes the problem and avoid confusion in the future just decrease
n counter when ctrl_fill_info() fail.

User visible problem caused by this bug is failure to get access to
some genetlink family i.e. nl80211. However problem is reproducible
only if number of registered genetlink families is big enough to
cause second call of ctrl_dumpfamily().

Cc: Xose Vazquez Perez <xose.vazquez@gmail.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Johannes Berg <johannes@sipsolutions.net>
Fixes: 2ae0f17df1cd ("genetlink: use idr to track families")
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index fb6e10fdb217..92e0981f7404 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
 				   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				   skb, CTRL_CMD_NEWFAMILY) < 0)
+				   skb, CTRL_CMD_NEWFAMILY) < 0) {
+			n--;
 			break;
+		}
 	}
 
 	cb->args[0] = n;
-- 
cgit v1.2.3


From 15bb7745e94a665caf42bfaabf0ce062845b533b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Mar 2017 08:10:21 -0700
Subject: tcp: initialize icsk_ack.lrcvtime at session start time

icsk_ack.lrcvtime has a 0 value at socket creation time.

tcpi_last_data_recv can have bogus value if no payload is ever received.

This patch initializes icsk_ack.lrcvtime for active sessions
in tcp_finish_connect(), and for passive sessions in
tcp_create_openreq_child()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c     | 2 +-
 net/ipv4/tcp_minisocks.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 39c393cc0fd3..c43119726a62 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5541,6 +5541,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tcp_set_state(sk, TCP_ESTABLISHED);
+	icsk->icsk_ack.lrcvtime = tcp_time_stamp;
 
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5759,7 +5760,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			 * to stand against the temptation 8)     --ANK
 			 */
 			inet_csk_schedule_ack(sk);
-			icsk->icsk_ack.lrcvtime = tcp_time_stamp;
 			tcp_enter_quickack_mode(sk);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7e16243cdb58..65c0f3d13eca 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -460,6 +460,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 		minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+		newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
 
 		newtp->packets_out = 0;
 		newtp->retrans_out = 0;
-- 
cgit v1.2.3


From ec4fbd64751de18729eaa816ec69e4b504b5a7a2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Mar 2017 08:57:15 -0700
Subject: inet: frag: release spinlock before calling icmp_send()

Dmitry reported a lockdep splat [1] (false positive) that we can fix
by releasing the spinlock before calling icmp_send() from ip_expire()

This is a false positive because sending an ICMP message can not
possibly re-enter the IP frag engine.

[1]
[ INFO: possible circular locking dependency detected ]
4.10.0+ #29 Not tainted
-------------------------------------------------------
modprobe/12392 is trying to acquire lock:
 (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>] spin_lock
include/linux/spinlock.h:299 [inline]
 (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>] __netif_tx_lock
include/linux/netdevice.h:3486 [inline]
 (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>]
sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180

but task is already holding lock:
 (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>] spin_lock
include/linux/spinlock.h:299 [inline]
 (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>]
ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&(&q->lock)->rlock){+.-...}:
       validate_chain kernel/locking/lockdep.c:2267 [inline]
       __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
       lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
       _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
       spin_lock include/linux/spinlock.h:299 [inline]
       ip_defrag+0x3a2/0x4130 net/ipv4/ip_fragment.c:669
       ip_check_defrag+0x4e3/0x8b0 net/ipv4/ip_fragment.c:713
       packet_rcv_fanout+0x282/0x800 net/packet/af_packet.c:1459
       deliver_skb net/core/dev.c:1834 [inline]
       dev_queue_xmit_nit+0x294/0xa90 net/core/dev.c:1890
       xmit_one net/core/dev.c:2903 [inline]
       dev_hard_start_xmit+0x16b/0xab0 net/core/dev.c:2923
       sch_direct_xmit+0x31f/0x6d0 net/sched/sch_generic.c:182
       __dev_xmit_skb net/core/dev.c:3092 [inline]
       __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
       dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
       neigh_resolve_output+0x6b9/0xb10 net/core/neighbour.c:1308
       neigh_output include/net/neighbour.h:478 [inline]
       ip_finish_output2+0x8b8/0x15a0 net/ipv4/ip_output.c:228
       ip_do_fragment+0x1d93/0x2720 net/ipv4/ip_output.c:672
       ip_fragment.constprop.54+0x145/0x200 net/ipv4/ip_output.c:545
       ip_finish_output+0x82d/0xe10 net/ipv4/ip_output.c:314
       NF_HOOK_COND include/linux/netfilter.h:246 [inline]
       ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
       dst_output include/net/dst.h:486 [inline]
       ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
       ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
       ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
       raw_sendmsg+0x26de/0x3a00 net/ipv4/raw.c:655
       inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
       sock_sendmsg_nosec net/socket.c:633 [inline]
       sock_sendmsg+0xca/0x110 net/socket.c:643
       ___sys_sendmsg+0x4a3/0x9f0 net/socket.c:1985
       __sys_sendmmsg+0x25c/0x750 net/socket.c:2075
       SYSC_sendmmsg net/socket.c:2106 [inline]
       SyS_sendmmsg+0x35/0x60 net/socket.c:2101
       do_syscall_64+0x2e8/0x930 arch/x86/entry/common.c:281
       return_from_SYSCALL_64+0x0/0x7a

-> #0 (_xmit_ETHER#2){+.-...}:
       check_prev_add kernel/locking/lockdep.c:1830 [inline]
       check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
       validate_chain kernel/locking/lockdep.c:2267 [inline]
       __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
       lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
       _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
       spin_lock include/linux/spinlock.h:299 [inline]
       __netif_tx_lock include/linux/netdevice.h:3486 [inline]
       sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
       __dev_xmit_skb net/core/dev.c:3092 [inline]
       __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
       dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
       neigh_hh_output include/net/neighbour.h:468 [inline]
       neigh_output include/net/neighbour.h:476 [inline]
       ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228
       ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316
       NF_HOOK_COND include/linux/netfilter.h:246 [inline]
       ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
       dst_output include/net/dst.h:486 [inline]
       ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
       ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
       ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
       icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394
       icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754
       ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239
       call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
       expire_timers kernel/time/timer.c:1307 [inline]
       __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
       run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
       __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
       invoke_softirq kernel/softirq.c:364 [inline]
       irq_exit+0x1cc/0x200 kernel/softirq.c:405
       exiting_irq arch/x86/include/asm/apic.h:657 [inline]
       smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
       apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
       __read_once_size include/linux/compiler.h:254 [inline]
       atomic_read arch/x86/include/asm/atomic.h:26 [inline]
       rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline]
       __rcu_is_watching kernel/rcu/tree.c:1133 [inline]
       rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147
       rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293
       radix_tree_deref_slot include/linux/radix-tree.h:238 [inline]
       filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335
       do_fault_around mm/memory.c:3231 [inline]
       do_read_fault mm/memory.c:3265 [inline]
       do_fault+0xbd5/0x2080 mm/memory.c:3370
       handle_pte_fault mm/memory.c:3600 [inline]
       __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714
       handle_mm_fault+0x1e2/0x480 mm/memory.c:3751
       __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397
       do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460
       page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&(&q->lock)->rlock);
                               lock(_xmit_ETHER#2);
                               lock(&(&q->lock)->rlock);
  lock(_xmit_ETHER#2);

 *** DEADLOCK ***

10 locks held by modprobe/12392:
 #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff81329758>]
__do_page_fault+0x2b8/0xb60 arch/x86/mm/fault.c:1336
 #1:  (rcu_read_lock){......}, at: [<ffffffff8188cab6>]
filemap_map_pages+0x1e6/0x1570 mm/filemap.c:2324
 #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
spin_lock include/linux/spinlock.h:299 [inline]
 #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
pte_alloc_one_map mm/memory.c:2944 [inline]
 #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
alloc_set_pte+0x13b8/0x1b90 mm/memory.c:3072
 #3:  (((&q->timer))){+.-...}, at: [<ffffffff81627e72>]
lockdep_copy_map include/linux/lockdep.h:175 [inline]
 #3:  (((&q->timer))){+.-...}, at: [<ffffffff81627e72>]
call_timer_fn+0x1c2/0x820 kernel/time/timer.c:1258
 #4:  (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>] spin_lock
include/linux/spinlock.h:299 [inline]
 #4:  (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>]
ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201
 #5:  (rcu_read_lock){......}, at: [<ffffffff8389a633>]
ip_expire+0x1b3/0x6c0 net/ipv4/ip_fragment.c:216
 #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>] spin_trylock
include/linux/spinlock.h:309 [inline]
 #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>] icmp_xmit_lock
net/ipv4/icmp.c:219 [inline]
 #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>]
icmp_send+0x803/0x1c80 net/ipv4/icmp.c:681
 #7:  (rcu_read_lock_bh){......}, at: [<ffffffff838ab9a1>]
ip_finish_output2+0x2c1/0x15a0 net/ipv4/ip_output.c:198
 #8:  (rcu_read_lock_bh){......}, at: [<ffffffff836d1dee>]
__dev_queue_xmit+0x23e/0x1e60 net/core/dev.c:3324
 #9:  (dev->qdisc_running_key ?: &qdisc_running_key){+.....}, at:
[<ffffffff836d3a27>] dev_queue_xmit+0x17/0x20 net/core/dev.c:3423

stack backtrace:
CPU: 0 PID: 12392 Comm: modprobe Not tainted 4.10.0+ #29
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:52
 print_circular_bug+0x307/0x3b0 kernel/locking/lockdep.c:1204
 check_prev_add kernel/locking/lockdep.c:1830 [inline]
 check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
 validate_chain kernel/locking/lockdep.c:2267 [inline]
 __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:299 [inline]
 __netif_tx_lock include/linux/netdevice.h:3486 [inline]
 sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
 __dev_xmit_skb net/core/dev.c:3092 [inline]
 __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
 neigh_hh_output include/net/neighbour.h:468 [inline]
 neigh_output include/net/neighbour.h:476 [inline]
 ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228
 ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
 dst_output include/net/dst.h:486 [inline]
 ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
 icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394
 icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754
 ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239
 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
 expire_timers kernel/time/timer.c:1307 [inline]
 __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
 invoke_softirq kernel/softirq.c:364 [inline]
 irq_exit+0x1cc/0x200 kernel/softirq.c:405
 exiting_irq arch/x86/include/asm/apic.h:657 [inline]
 smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
RIP: 0010:__read_once_size include/linux/compiler.h:254 [inline]
RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline]
RIP: 0010:rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline]
RIP: 0010:__rcu_is_watching kernel/rcu/tree.c:1133 [inline]
RIP: 0010:rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147
RSP: 0000:ffff8801c391f120 EFLAGS: 00000a03 ORIG_RAX: ffffffffffffff10
RAX: dffffc0000000000 RBX: ffff8801c391f148 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 000055edd4374000 RDI: ffff8801dbe1ae0c
RBP: ffff8801c391f1a0 R08: 0000000000000002 R09: 0000000000000000
R10: dffffc0000000000 R11: 0000000000000002 R12: 1ffff10038723e25
R13: ffff8801dbe1ae00 R14: ffff8801c391f680 R15: dffffc0000000000
 </IRQ>
 rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293
 radix_tree_deref_slot include/linux/radix-tree.h:238 [inline]
 filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335
 do_fault_around mm/memory.c:3231 [inline]
 do_read_fault mm/memory.c:3265 [inline]
 do_fault+0xbd5/0x2080 mm/memory.c:3370
 handle_pte_fault mm/memory.c:3600 [inline]
 __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714
 handle_mm_fault+0x1e2/0x480 mm/memory.c:3751
 __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397
 do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460
 page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011
RIP: 0033:0x7f83172f2786
RSP: 002b:00007fffe859ae80 EFLAGS: 00010293
RAX: 000055edd4373040 RBX: 00007f83175111c8 RCX: 000055edd4373238
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f8317510970
RBP: 00007fffe859afd0 R08: 0000000000000009 R09: 0000000000000000
R10: 0000000000000064 R11: 0000000000000000 R12: 000055edd4373040
R13: 0000000000000000 R14: 00007fffe859afe8 R15: 0000000000000000

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbe7f72db9c1..b3cdeec85f1f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg)
 	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
 	net = container_of(qp->q.net, struct net, ipv4.frags);
 
+	rcu_read_lock();
 	spin_lock(&qp->q.lock);
 
 	if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg)
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 
 	if (!inet_frag_evicting(&qp->q)) {
-		struct sk_buff *head = qp->q.fragments;
+		struct sk_buff *clone, *head = qp->q.fragments;
 		const struct iphdr *iph;
 		int err;
 
@@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg)
 		if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
 			goto out;
 
-		rcu_read_lock();
 		head->dev = dev_get_by_index_rcu(net, qp->iif);
 		if (!head->dev)
-			goto out_rcu_unlock;
+			goto out;
+
 
 		/* skb has no dst, perform route lookup again */
 		iph = ip_hdr(head);
 		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
 					   iph->tos, head->dev);
 		if (err)
-			goto out_rcu_unlock;
+			goto out;
 
 		/* Only an end host needs to send an ICMP
 		 * "Fragment Reassembly Timeout" message, per RFC792.
 		 */
 		if (frag_expire_skip_icmp(qp->user) &&
 		    (skb_rtable(head)->rt_type != RTN_LOCAL))
-			goto out_rcu_unlock;
+			goto out;
+
+		clone = skb_clone(head, GFP_ATOMIC);
 
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
-		icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
-		rcu_read_unlock();
+		if (clone) {
+			spin_unlock(&qp->q.lock);
+			icmp_send(clone, ICMP_TIME_EXCEEDED,
+				  ICMP_EXC_FRAGTIME, 0);
+			consume_skb(clone);
+			goto out_rcu_unlock;
+		}
 	}
 out:
 	spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+	rcu_read_unlock();
 	ipq_put(qp);
 }
 
-- 
cgit v1.2.3


From 6e9e6cc8f4e4f2cd67931510c9f39abf3d9e0d3b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Mar 2017 15:31:10 -0700
Subject: Bluetooth: btqcomsmd: fix compile-test dependency

compile-testing fails when QCOM_SMD is a loadable module:

drivers/bluetooth/built-in.o: In function `btqcomsmd_send':
btqca.c:(.text+0xa8): undefined reference to `qcom_smd_send'
drivers/bluetooth/built-in.o: In function `btqcomsmd_probe':
btqca.c:(.text+0x3ec): undefined reference to `qcom_wcnss_open_channel'
btqca.c:(.text+0x46c): undefined reference to `qcom_smd_set_drvdata'

This clarifies the dependency to allow compile-testing only when
SMD is completely disabled, otherwise the dependency on QCOM_SMD
will make sure we can link against it.

Fixes: e27ee2b16bad ("Bluetooth: btqcomsmd: Allow driver to build if COMPILE_TEST is enabled")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[bjorn: Restructure and clarify dependency to QCOM_WCNSS_CTRL]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bluetooth/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index c2c14a12713b..08e054507d0b 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -344,7 +344,8 @@ config BT_WILINK
 
 config BT_QCOMSMD
 	tristate "Qualcomm SMD based HCI support"
-	depends on (QCOM_SMD && QCOM_WCNSS_CTRL) || COMPILE_TEST
+	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on QCOM_WCNSS_CTRL || (COMPILE_TEST && QCOM_WCNSS_CTRL=n)
 	select BT_QCA
 	help
 	  Qualcomm SMD based HCI driver.
-- 
cgit v1.2.3


From c04ca616eed02b9abe7afd311382c3ed5eef5c40 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 22 Mar 2017 12:10:02 +0300
Subject: sfc: cleanup a condition in efx_udp_tunnel_del()

Presumably if there is an "add" function, there is also a "del"
function.  But it causes a static checker warning because it looks like
a common cut and paste bug.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 334bcc6df6b2..50d28261b6b9 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2404,7 +2404,7 @@ static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *t
 	tnl.type = (u16)efx_tunnel_type;
 	tnl.port = ti->port;
 
-	if (efx->type->udp_tnl_add_port)
+	if (efx->type->udp_tnl_del_port)
 		(void)efx->type->udp_tnl_del_port(efx, tnl);
 }
 
-- 
cgit v1.2.3


From f43feef4e6acde10857fcbfdede790d6b3f2c71d Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Wed, 22 Mar 2017 17:25:27 -0500
Subject: amd-xgbe: Fix the ECC-related bit position definitions

The ECC bit positions that describe whether the ECC interrupt is for
Tx, Rx or descriptor memory and whether the it is a single correctable
or double detected error were defined in incorrectly (reversed order).
Fix the bit position definitions for these settings so that the proper
ECC handling is performed.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 86f1626816ff..127adbeefb10 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -984,29 +984,29 @@
 #define XP_ECC_CNT1_DESC_DED_WIDTH		8
 #define XP_ECC_CNT1_DESC_SEC_INDEX		0
 #define XP_ECC_CNT1_DESC_SEC_WIDTH		8
-#define XP_ECC_IER_DESC_DED_INDEX		0
+#define XP_ECC_IER_DESC_DED_INDEX		5
 #define XP_ECC_IER_DESC_DED_WIDTH		1
-#define XP_ECC_IER_DESC_SEC_INDEX		1
+#define XP_ECC_IER_DESC_SEC_INDEX		4
 #define XP_ECC_IER_DESC_SEC_WIDTH		1
-#define XP_ECC_IER_RX_DED_INDEX			2
+#define XP_ECC_IER_RX_DED_INDEX			3
 #define XP_ECC_IER_RX_DED_WIDTH			1
-#define XP_ECC_IER_RX_SEC_INDEX			3
+#define XP_ECC_IER_RX_SEC_INDEX			2
 #define XP_ECC_IER_RX_SEC_WIDTH			1
-#define XP_ECC_IER_TX_DED_INDEX			4
+#define XP_ECC_IER_TX_DED_INDEX			1
 #define XP_ECC_IER_TX_DED_WIDTH			1
-#define XP_ECC_IER_TX_SEC_INDEX			5
+#define XP_ECC_IER_TX_SEC_INDEX			0
 #define XP_ECC_IER_TX_SEC_WIDTH			1
-#define XP_ECC_ISR_DESC_DED_INDEX		0
+#define XP_ECC_ISR_DESC_DED_INDEX		5
 #define XP_ECC_ISR_DESC_DED_WIDTH		1
-#define XP_ECC_ISR_DESC_SEC_INDEX		1
+#define XP_ECC_ISR_DESC_SEC_INDEX		4
 #define XP_ECC_ISR_DESC_SEC_WIDTH		1
-#define XP_ECC_ISR_RX_DED_INDEX			2
+#define XP_ECC_ISR_RX_DED_INDEX			3
 #define XP_ECC_ISR_RX_DED_WIDTH			1
-#define XP_ECC_ISR_RX_SEC_INDEX			3
+#define XP_ECC_ISR_RX_SEC_INDEX			2
 #define XP_ECC_ISR_RX_SEC_WIDTH			1
-#define XP_ECC_ISR_TX_DED_INDEX			4
+#define XP_ECC_ISR_TX_DED_INDEX			1
 #define XP_ECC_ISR_TX_DED_WIDTH			1
-#define XP_ECC_ISR_TX_SEC_INDEX			5
+#define XP_ECC_ISR_TX_SEC_INDEX			0
 #define XP_ECC_ISR_TX_SEC_WIDTH			1
 #define XP_I2C_MUTEX_BUSY_INDEX			31
 #define XP_I2C_MUTEX_BUSY_WIDTH			1
-- 
cgit v1.2.3


From 68c386590375b2aea5a3154f17882a30170707bf Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 02:20:39 +0300
Subject: net:ethernet:aquantia: Fix for RX checksum offload.

Since AQC-100/107/108 chips supports hardware checksums for RX we should indicate this
via NETIF_F_RXCSUM flag.

v1->v2: 'Signed-off-by' tag added.

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h | 1 +
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
index 1093ea18823a..0592a0330cf0 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
@@ -137,6 +137,7 @@ static struct aq_hw_caps_s hw_atl_a0_hw_caps_ = {
 	.tx_rings = HW_ATL_A0_TX_RINGS,
 	.rx_rings = HW_ATL_A0_RX_RINGS,
 	.hw_features = NETIF_F_HW_CSUM |
+			NETIF_F_RXCSUM |
 			NETIF_F_RXHASH |
 			NETIF_F_SG |
 			NETIF_F_TSO,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 8bdee3ddd5a0..f3957e930340 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -188,6 +188,7 @@ static struct aq_hw_caps_s hw_atl_b0_hw_caps_ = {
 	.tx_rings = HW_ATL_B0_TX_RINGS,
 	.rx_rings = HW_ATL_B0_RX_RINGS,
 	.hw_features = NETIF_F_HW_CSUM |
+			NETIF_F_RXCSUM |
 			NETIF_F_RXHASH |
 			NETIF_F_SG |
 			NETIF_F_TSO |
-- 
cgit v1.2.3


From e2ebfb2142acefecc2496e71360f50d25726040b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 20 Mar 2017 19:50:29 +0200
Subject: mmc: sdhci: Do not disable interrupts while waiting for clock

Disabling interrupts for even a millisecond can cause problems for some
devices. That can happen when sdhci changes clock frequency because it
waits for the clock to become stable under a spin lock.

The spin lock is not necessary here. Anything that is racing with changes
to the I/O state is already broken. The mmc core already provides
synchronization via "claiming" the host.

Although the spin lock probably should be removed from the code paths that
lead to this point, such a patch would touch too much code to be suitable
for stable trees. Consequently, for this patch, just drop the spin lock
while waiting.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Ludovic Desroches <ludovic.desroches@microchip.com>
---
 drivers/mmc/host/sdhci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6fdd7a70f229..9c1a099afbbe 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1362,7 +1362,9 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
 			return;
 		}
 		timeout--;
-		mdelay(1);
+		spin_unlock_irq(&host->lock);
+		usleep_range(900, 1100);
+		spin_lock_irq(&host->lock);
 	}
 
 	clk |= SDHCI_CLOCK_CARD_EN;
-- 
cgit v1.2.3


From 027fb89e61054b4aedd962adb3e2003dec78a716 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 20 Mar 2017 19:50:30 +0200
Subject: mmc: sdhci-pci: Do not disable interrupts in sdhci_intel_set_power

Disabling interrupts for even a millisecond can cause problems for some
devices. That can happen when Intel host controllers wait for the present
state to propagate.

The spin lock is not necessary here. Anything that is racing with changes
to the I/O state is already broken. The mmc core already provides
synchronization via "claiming" the host.

Although the spin lock probably should be removed from the code paths that
lead to this point, such a patch would touch too much code to be suitable
for stable trees. Consequently, for this patch, just drop the spin lock
while waiting.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Ludovic Desroches <ludovic.desroches@microchip.com>
---
 drivers/mmc/host/sdhci-pci-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 982b3e349426..86560d590786 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -451,6 +451,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
 	if (mode == MMC_POWER_OFF)
 		return;
 
+	spin_unlock_irq(&host->lock);
+
 	/*
 	 * Bus power might not enable after D3 -> D0 transition due to the
 	 * present state not yet having propagated. Retry for up to 2ms.
@@ -463,6 +465,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
 		reg |= SDHCI_POWER_ON;
 		sdhci_writeb(host, reg, SDHCI_POWER_CONTROL);
 	}
+
+	spin_lock_irq(&host->lock);
 }
 
 static const struct sdhci_ops sdhci_intel_byt_ops = {
-- 
cgit v1.2.3


From 3f307834e695f59dac4337a40316bdecfb9d0508 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Thu, 23 Mar 2017 10:00:25 +0800
Subject: ALSA: hda - Adding a group of pin definition to fix headset problem

A new Dell laptop needs to apply ALC269_FIXUP_DELL1_MIC_NO_PRESENCE to
fix the headset problem, and the pin definiton of this machine is not
in the pin quirk table yet, now adding it to the table.

Signed-off-by: Hui Wang <hui.wang@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8d6b3703d0a2..7f989898cbd9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6102,6 +6102,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		ALC295_STANDARD_PINS,
 		{0x17, 0x21014040},
 		{0x18, 0x21a19050}),
+	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC295_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC298_STANDARD_PINS,
 		{0x17, 0x90170110}),
-- 
cgit v1.2.3


From 633ee407b9d15a75ac9740ba9d3338815e1fcb95 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 21 Mar 2017 13:44:28 +0100
Subject: libceph: force GFP_NOIO for socket allocations

sock_alloc_inode() allocates socket+inode and socket_wq with
GFP_KERNEL, which is not allowed on the writeback path:

    Workqueue: ceph-msgr con_work [libceph]
    ffff8810871cb018 0000000000000046 0000000000000000 ffff881085d40000
    0000000000012b00 ffff881025cad428 ffff8810871cbfd8 0000000000012b00
    ffff880102fc1000 ffff881085d40000 ffff8810871cb038 ffff8810871cb148
    Call Trace:
    [<ffffffff816dd629>] schedule+0x29/0x70
    [<ffffffff816e066d>] schedule_timeout+0x1bd/0x200
    [<ffffffff81093ffc>] ? ttwu_do_wakeup+0x2c/0x120
    [<ffffffff81094266>] ? ttwu_do_activate.constprop.135+0x66/0x70
    [<ffffffff816deb5f>] wait_for_completion+0xbf/0x180
    [<ffffffff81097cd0>] ? try_to_wake_up+0x390/0x390
    [<ffffffff81086335>] flush_work+0x165/0x250
    [<ffffffff81082940>] ? worker_detach_from_pool+0xd0/0xd0
    [<ffffffffa03b65b1>] xlog_cil_force_lsn+0x81/0x200 [xfs]
    [<ffffffff816d6b42>] ? __slab_free+0xee/0x234
    [<ffffffffa03b4b1d>] _xfs_log_force_lsn+0x4d/0x2c0 [xfs]
    [<ffffffff811adc1e>] ? lookup_page_cgroup_used+0xe/0x30
    [<ffffffffa039a723>] ? xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa03b4dcf>] xfs_log_force_lsn+0x3f/0xf0 [xfs]
    [<ffffffffa039a723>] ? xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa03a62c6>] xfs_iunpin_wait+0xc6/0x1a0 [xfs]
    [<ffffffff810aa250>] ? wake_atomic_t_function+0x40/0x40
    [<ffffffffa039a723>] xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa039ac07>] xfs_reclaim_inodes_ag+0x257/0x3d0 [xfs]
    [<ffffffffa039bb13>] xfs_reclaim_inodes_nr+0x33/0x40 [xfs]
    [<ffffffffa03ab745>] xfs_fs_free_cached_objects+0x15/0x20 [xfs]
    [<ffffffff811c0c18>] super_cache_scan+0x178/0x180
    [<ffffffff8115912e>] shrink_slab_node+0x14e/0x340
    [<ffffffff811afc3b>] ? mem_cgroup_iter+0x16b/0x450
    [<ffffffff8115af70>] shrink_slab+0x100/0x140
    [<ffffffff8115e425>] do_try_to_free_pages+0x335/0x490
    [<ffffffff8115e7f9>] try_to_free_pages+0xb9/0x1f0
    [<ffffffff816d56e4>] ? __alloc_pages_direct_compact+0x69/0x1be
    [<ffffffff81150cba>] __alloc_pages_nodemask+0x69a/0xb40
    [<ffffffff8119743e>] alloc_pages_current+0x9e/0x110
    [<ffffffff811a0ac5>] new_slab+0x2c5/0x390
    [<ffffffff816d71c4>] __slab_alloc+0x33b/0x459
    [<ffffffff815b906d>] ? sock_alloc_inode+0x2d/0xd0
    [<ffffffff8164bda1>] ? inet_sendmsg+0x71/0xc0
    [<ffffffff815b906d>] ? sock_alloc_inode+0x2d/0xd0
    [<ffffffff811a21f2>] kmem_cache_alloc+0x1a2/0x1b0
    [<ffffffff815b906d>] sock_alloc_inode+0x2d/0xd0
    [<ffffffff811d8566>] alloc_inode+0x26/0xa0
    [<ffffffff811da04a>] new_inode_pseudo+0x1a/0x70
    [<ffffffff815b933e>] sock_alloc+0x1e/0x80
    [<ffffffff815ba855>] __sock_create+0x95/0x220
    [<ffffffff815baa04>] sock_create_kern+0x24/0x30
    [<ffffffffa04794d9>] con_work+0xef9/0x2050 [libceph]
    [<ffffffffa04aa9ec>] ? rbd_img_request_submit+0x4c/0x60 [rbd]
    [<ffffffff81084c19>] process_one_work+0x159/0x4f0
    [<ffffffff8108561b>] worker_thread+0x11b/0x530
    [<ffffffff81085500>] ? create_worker+0x1d0/0x1d0
    [<ffffffff8108b6f9>] kthread+0xc9/0xe0
    [<ffffffff8108b630>] ? flush_kthread_worker+0x90/0x90
    [<ffffffff816e1b98>] ret_from_fork+0x58/0x90
    [<ffffffff8108b630>] ? flush_kthread_worker+0x90/0x90

Use memalloc_noio_{save,restore}() to temporarily force GFP_NOIO here.

Cc: stable@vger.kernel.org # 3.10+, needs backporting
Link: http://tracker.ceph.com/issues/19309
Reported-by: Sergey Jerusalimov <wintchester@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 net/ceph/messenger.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 38dcf1eb427d..f76bb3332613 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
 #include <linux/kthread.h>
 #include <linux/net.h>
 #include <linux/nsproxy.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 {
 	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
 	struct socket *sock;
+	unsigned int noio_flag;
 	int ret;
 
 	BUG_ON(con->sock);
+
+	/* sock_create_kern() allocates with GFP_KERNEL */
+	noio_flag = memalloc_noio_save();
 	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
+	memalloc_noio_restore(noio_flag);
 	if (ret)
 		return ret;
 	sock->sk->sk_allocation = GFP_NOFS;
-- 
cgit v1.2.3


From 1c2593cc8fd5960f8861de1be67135851f884836 Mon Sep 17 00:00:00 2001
From: Ankur Arora <ankur.a.arora@oracle.com>
Date: Tue, 21 Mar 2017 15:43:37 -0700
Subject: xen/acpi: Replace hard coded "ACPI0007"

Replace hard coded "ACPI0007" with ACPI_PROCESSOR_DEVICE_HID

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xen-acpi-processor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 4ce10bcca18b..fac0d7b0edf7 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -408,7 +408,7 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 			    ACPI_UINT32_MAX,
 			    read_acpi_id, NULL, NULL, NULL);
-	acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
+	acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL);
 
 upload:
 	if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {
-- 
cgit v1.2.3


From 12ffed96d4369f086261ba2ee734fa8c932d7f55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Thu, 23 Mar 2017 17:53:26 +0900
Subject: drm/fb-helper: Allow var->x/yres(_virtual) < fb->width/height again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise this can also prevent modesets e.g. for switching VTs, when
multiple monitors with different native resolutions are connected.

The depths must match though, so keep the != test for that.

Also update the DRM_DEBUG output to be slightly more accurate, this
doesn't only affect requests from userspace.

Bugzilla: https://bugs.freedesktop.org/99841
Fixes: 865afb11949e ("drm/fb-helper: reject any changes to the fbdev")
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170323085326.20185-1-michel@daenzer.net
---
 drivers/gpu/drm/drm_fb_helper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index f6d4d9700734..324a688b3f30 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1260,9 +1260,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	 * to KMS, hence fail if different settings are requested.
 	 */
 	if (var->bits_per_pixel != fb->format->cpp[0] * 8 ||
-	    var->xres != fb->width || var->yres != fb->height ||
-	    var->xres_virtual != fb->width || var->yres_virtual != fb->height) {
-		DRM_DEBUG("fb userspace requested width/height/bpp different than current fb "
+	    var->xres > fb->width || var->yres > fb->height ||
+	    var->xres_virtual > fb->width || var->yres_virtual > fb->height) {
+		DRM_DEBUG("fb requested width/height/bpp can't fit in current fb "
 			  "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n",
 			  var->xres, var->yres, var->bits_per_pixel,
 			  var->xres_virtual, var->yres_virtual,
-- 
cgit v1.2.3


From 1914f0cd203c941bba72f9452c8290324f1ef3dc Mon Sep 17 00:00:00 2001
From: Ankur Arora <ankur.a.arora@oracle.com>
Date: Tue, 21 Mar 2017 15:43:38 -0700
Subject: xen/acpi: upload PM state from init-domain to Xen

This was broken in commit cd979883b9ed ("xen/acpi-processor:
fix enabling interrupts on syscore_resume"). do_suspend (from
xen/manage.c) and thus xen_resume_notifier never get called on
the initial-domain at resume (it is if running as guest.)

The rationale for the breaking change was that upload_pm_data()
potentially does blocking work in syscore_resume(). This patch
addresses the original issue by scheduling upload_pm_data() to
execute in workqueue context.

Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: stable@vger.kernel.org
Based-on-patch-by: Konrad Wilk <konrad.wilk@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xen-acpi-processor.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index fac0d7b0edf7..23e391d3ec01 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -27,10 +27,10 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/syscore_ops.h>
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 #include <xen/xen.h>
-#include <xen/xen-ops.h>
 #include <xen/interface/platform.h>
 #include <asm/xen/hypercall.h>
 
@@ -466,15 +466,33 @@ static int xen_upload_processor_pm_data(void)
 	return rc;
 }
 
-static int xen_acpi_processor_resume(struct notifier_block *nb,
-				     unsigned long action, void *data)
+static void xen_acpi_processor_resume_worker(struct work_struct *dummy)
 {
+	int rc;
+
 	bitmap_zero(acpi_ids_done, nr_acpi_bits);
-	return xen_upload_processor_pm_data();
+
+	rc = xen_upload_processor_pm_data();
+	if (rc != 0)
+		pr_info("ACPI data upload failed, error = %d\n", rc);
+}
+
+static void xen_acpi_processor_resume(void)
+{
+	static DECLARE_WORK(wq, xen_acpi_processor_resume_worker);
+
+	/*
+	 * xen_upload_processor_pm_data() calls non-atomic code.
+	 * However, the context for xen_acpi_processor_resume is syscore
+	 * with only the boot CPU online and in an atomic context.
+	 *
+	 * So defer the upload for some point safer.
+	 */
+	schedule_work(&wq);
 }
 
-struct notifier_block xen_acpi_processor_resume_nb = {
-	.notifier_call = xen_acpi_processor_resume,
+static struct syscore_ops xap_syscore_ops = {
+	.resume	= xen_acpi_processor_resume,
 };
 
 static int __init xen_acpi_processor_init(void)
@@ -527,7 +545,7 @@ static int __init xen_acpi_processor_init(void)
 	if (rc)
 		goto err_unregister;
 
-	xen_resume_notifier_register(&xen_acpi_processor_resume_nb);
+	register_syscore_ops(&xap_syscore_ops);
 
 	return 0;
 err_unregister:
@@ -544,7 +562,7 @@ static void __exit xen_acpi_processor_exit(void)
 {
 	int i;
 
-	xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb);
+	unregister_syscore_ops(&xap_syscore_ops);
 	kfree(acpi_ids_done);
 	kfree(acpi_id_present);
 	kfree(acpi_id_cst_present);
-- 
cgit v1.2.3


From a2125d02443e9a4e68bcfd9f8004fa23239e8329 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Mar 2017 16:03:11 +0100
Subject: hwmon: (asus_atk0110) fix uninitialized data access

The latest gcc-7 snapshot adds a warning to point out that when
atk_read_value_old or atk_read_value_new fails, we copy
uninitialized data into sensor->cached_value:

drivers/hwmon/asus_atk0110.c: In function 'atk_input_show':
drivers/hwmon/asus_atk0110.c:651:26: error: 'value' may be used uninitialized in this function [-Werror=maybe-uninitialized]

Adding an error check avoids this. All versions of the driver
are affected.

Fixes: 2c03d07ad54d ("hwmon: Add Asus ATK0110 support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus_atk0110.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index cccef87963e0..975c43d446f8 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value)
 		else
 			err = atk_read_value_new(sensor, value);
 
+		if (err)
+			return err;
+
 		sensor->is_valid = true;
 		sensor->last_updated = jiffies;
 		sensor->cached_value = *value;
-- 
cgit v1.2.3


From 871a8623d3b40221ad1103aff715dfee0aa4dacf Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 17 Mar 2017 18:30:07 -0500
Subject: i40iw: Receive netdev events post INET_NOTIFIER state

Netdev notification events are de-registered only when all
client iwdev instances are removed. If a single client is closed
and re-opened, netdev events could arrive even before the Control
Queue-Pair (CQP) is created, causing a NULL pointer dereference crash
in i40iw_get_cqp_request. Fix this by allowing netdev event
notification only after we have reached the INET_NOTIFIER state with
respect to device initialization.

Reported-by: Stefan Assmann <sassmann@redhat.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 0f5d43d1f5fc..70c3e9e79508 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,6 +160,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
+	if (iwdev->init_state < INET_NOTIFIER)
+		return NOTIFY_DONE;
+
 	netdev = iwdev->ldev->netdev;
 	upper_dev = netdev_master_upper_dev_get(netdev);
 	if (netdev != event_netdev)
@@ -214,6 +217,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
+	if (iwdev->init_state < INET_NOTIFIER)
+		return NOTIFY_DONE;
+
 	netdev = iwdev->ldev->netdev;
 	if (netdev != event_netdev)
 		return NOTIFY_DONE;
@@ -260,6 +266,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
 		if (!iwhdl)
 			return NOTIFY_DONE;
 		iwdev = &iwhdl->device;
+		if (iwdev->init_state < INET_NOTIFIER)
+			return NOTIFY_DONE;
 		p = (__be32 *)neigh->primary_key;
 		i40iw_copy_ip_ntohl(local_ipaddr, p);
 		if (neigh->nud_state & NUD_VALID) {
-- 
cgit v1.2.3


From 86f46aba8d1ac3ed0904542158a9b9cb9c7a143c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 8 Mar 2017 22:00:52 +0200
Subject: IB/core: Protect against self-requeue of a cq work item

We need to make sure that the cq work item does not
run when we are destroying the cq. Unlike flush_work,
cancel_work_sync protects against self-requeue of the
work item (which we can do in ib_cq_poll_work).

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>--
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index e95510117a6d..2746d2eb3d52 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq)
 		irq_poll_disable(&cq->iop);
 		break;
 	case IB_POLL_WORKQUEUE:
-		flush_work(&cq->work);
+		cancel_work_sync(&cq->work);
 		break;
 	default:
 		WARN_ON_ONCE(1);
-- 
cgit v1.2.3


From cb8864559631754ac93d5734b165ccd0cad4728c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Fri, 10 Mar 2017 11:34:20 -0700
Subject: infiniband: Fix alignment of mmap cookies to support VIPT caching

When vmalloc_user is used to create memory that is supposed to be mmap'd
to user space, it is necessary for the mmap cookie (eg the offset) to be
aligned to SHMLBA.

This creates a situation where all virtual mappings of the same physical
page share the same virtual cache index and guarantees VIPT coherence.
Otherwise the cache is non-coherent and the kernel will not see writes
by userspace when reading the shared page (or vice-versa).

Reported-by: Josh Beavers <josh.beavers@gmail.com>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rdmavt/mmap.c  | 4 ++--
 drivers/infiniband/sw/rxe/rxe_mmap.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index e202b8142759..6b712eecbd37 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
 
 	spin_lock_irq(&rdi->mmap_offset_lock);
 	if (rdi->mmap_offset == 0)
-		rdi->mmap_offset = PAGE_SIZE;
+		rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
 	ip->offset = rdi->mmap_offset;
-	rdi->mmap_offset += size;
+	rdi->mmap_offset += ALIGN(size, SHMLBA);
 	spin_unlock_irq(&rdi->mmap_offset_lock);
 
 	INIT_LIST_HEAD(&ip->pending_mmaps);
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index c572a4c09359..bd812e00988e 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
 	spin_lock_bh(&rxe->mmap_offset_lock);
 
 	if (rxe->mmap_offset == 0)
-		rxe->mmap_offset = PAGE_SIZE;
+		rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
 
 	ip->info.offset = rxe->mmap_offset;
-	rxe->mmap_offset += size;
+	rxe->mmap_offset += ALIGN(size, SHMLBA);
 
 	spin_unlock_bh(&rxe->mmap_offset_lock);
 
-- 
cgit v1.2.3


From 6332dee83d8eab80d6d502cc51135b998fe6df79 Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Wed, 22 Feb 2017 17:22:56 -0800
Subject: RDMA/vmw_pvrdma: Cleanup unused variables

Removed the unused nreq and redundant index variables.
Moved hardcoded async and cq ring pages number to macro.

Reported-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Reviewed-by: Aditya Sarwade <asarwade@vmware.com>
Tested-by: Andrew Boyer <andrew.boyer@dell.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h      |  2 ++
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c |  4 ++--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c   | 33 ++++++++++----------------
 3 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 3cd96c1b9502..dbf61c37835c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -69,6 +69,8 @@
  */
 #define PCI_DEVICE_ID_VMWARE_PVRDMA	0x0820
 
+#define PVRDMA_NUM_RING_PAGES		4
+
 struct pvrdma_dev;
 
 struct pvrdma_page_dir {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 100bea5c42ff..e77476ca70ac 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -858,7 +858,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	dev->dsr->resp_slot_dma = (u64)slot_dma;
 
 	/* Async event ring */
-	dev->dsr->async_ring_pages.num_pages = 4;
+	dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
 	ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
 				   dev->dsr->async_ring_pages.num_pages, true);
 	if (ret)
@@ -867,7 +867,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
 
 	/* CQ notification ring */
-	dev->dsr->cq_ring_pages.num_pages = 4;
+	dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
 	ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
 				   dev->dsr->cq_ring_pages.num_pages, true);
 	if (ret)
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index dbbfd35e7da7..3ffbb2d42170 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -554,13 +554,13 @@ out:
 	return ret;
 }
 
-static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
 {
 	return pvrdma_page_dir_get_ptr(&qp->pdir,
 				       qp->sq.offset + n * qp->sq.wqe_size);
 }
 
-static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 {
 	return pvrdma_page_dir_get_ptr(&qp->pdir,
 				       qp->rq.offset + n * qp->rq.wqe_size);
@@ -598,9 +598,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	unsigned long flags;
 	struct pvrdma_sq_wqe_hdr *wqe_hdr;
 	struct pvrdma_sge *sge;
-	int i, index;
-	int nreq;
-	int ret;
+	int i, ret;
 
 	/*
 	 * In states lower than RTS, we can fail immediately. In other states,
@@ -613,9 +611,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
-	index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
-	for (nreq = 0; wr; nreq++, wr = wr->next) {
-		unsigned int tail;
+	while (wr) {
+		unsigned int tail = 0;
 
 		if (unlikely(!pvrdma_idx_ring_has_space(
 				qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
@@ -680,7 +677,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			}
 		}
 
-		wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
+		wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
 		memset(wqe_hdr, 0, sizeof(*wqe_hdr));
 		wqe_hdr->wr_id = wr->wr_id;
 		wqe_hdr->num_sge = wr->num_sge;
@@ -771,12 +768,11 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		/* Make sure wqe is written before index update */
 		smp_wmb();
 
-		index++;
-		if (unlikely(index >= qp->sq.wqe_cnt))
-			index = 0;
 		/* Update shared sq ring */
 		pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
 				    qp->sq.wqe_cnt);
+
+		wr = wr->next;
 	}
 
 	ret = 0;
@@ -806,7 +802,6 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	struct pvrdma_qp *qp = to_vqp(ibqp);
 	struct pvrdma_rq_wqe_hdr *wqe_hdr;
 	struct pvrdma_sge *sge;
-	int index, nreq;
 	int ret = 0;
 	int i;
 
@@ -821,9 +816,8 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
-	index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
-	for (nreq = 0; wr; nreq++, wr = wr->next) {
-		unsigned int tail;
+	while (wr) {
+		unsigned int tail = 0;
 
 		if (unlikely(wr->num_sge > qp->rq.max_sg ||
 			     wr->num_sge < 0)) {
@@ -843,7 +837,7 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			goto out;
 		}
 
-		wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
+		wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
 		wqe_hdr->wr_id = wr->wr_id;
 		wqe_hdr->num_sge = wr->num_sge;
 		wqe_hdr->total_len = 0;
@@ -859,12 +853,11 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		/* Make sure wqe is written before index update */
 		smp_wmb();
 
-		index++;
-		if (unlikely(index >= qp->rq.wqe_cnt))
-			index = 0;
 		/* Update shared rq ring */
 		pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
 				    qp->rq.wqe_cnt);
+
+		wr = wr->next;
 	}
 
 	spin_unlock_irqrestore(&qp->rq.lock, flags);
-- 
cgit v1.2.3


From e51c2fb0331cb3440d7dc83ee78019ee8c7bb366 Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Wed, 22 Feb 2017 17:22:57 -0800
Subject: RDMA/vmw_pvrdma: Dont hardcode QP header page

Moved the header page count to a macro.

Reported-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Reviewed-by: Aditya Sarwade <asarwade@vmware.com>
Tested-by: Andrew Boyer <andrew.boyer@dell.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h    | 1 +
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index dbf61c37835c..9fbe22d3467b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -70,6 +70,7 @@
 #define PCI_DEVICE_ID_VMWARE_PVRDMA	0x0820
 
 #define PVRDMA_NUM_RING_PAGES		4
+#define PVRDMA_QP_NUM_HEADER_PAGES	1
 
 struct pvrdma_dev;
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 3ffbb2d42170..30062aad3af1 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -170,8 +170,9 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 					     sizeof(struct pvrdma_sge) *
 					     qp->sq.max_sg);
 	/* Note: one extra page for the header. */
-	qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
-			       PAGE_SIZE - 1) / PAGE_SIZE;
+	qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
+			  (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
+								PAGE_SIZE;
 
 	return 0;
 }
@@ -288,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 			qp->npages = qp->npages_send + qp->npages_recv;
 
 			/* Skip header page. */
-			qp->sq.offset = PAGE_SIZE;
+			qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
 
 			/* Recv queue pages are after send pages. */
 			qp->rq.offset = qp->npages_send * PAGE_SIZE;
@@ -341,7 +342,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 	cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
 	cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
 	cmd->total_chunks = qp->npages;
-	cmd->send_chunks = qp->npages_send - 1;
+	cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
 	cmd->pdir_dma = qp->pdir.dir_dma;
 
 	dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
-- 
cgit v1.2.3


From b172679b0d3b93a6197b2ff892794e7178e3c448 Mon Sep 17 00:00:00 2001
From: Aditya Sarwade <asarwade@vmware.com>
Date: Wed, 22 Feb 2017 17:22:58 -0800
Subject: RDMA/vmw_pvrdma: Activate device on ethernet link up

Restore device state when ethernet link changes to active.

Acked-by: George Zhang <georgezhang@vmware.com>
Acked-by: Jorgen Hansen <jhansen@vmware.com>
Acked-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h |  2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c    | 13 +++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
index e69d6f3cae32..09078ccfaec7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -132,7 +132,7 @@ enum pvrdma_pci_resource {
 
 enum pvrdma_device_ctl {
 	PVRDMA_DEVICE_CTL_ACTIVATE,	/* Activate device. */
-	PVRDMA_DEVICE_CTL_QUIESCE,	/* Quiesce device. */
+	PVRDMA_DEVICE_CTL_UNQUIESCE,	/* Unquiesce device. */
 	PVRDMA_DEVICE_CTL_RESET,	/* Reset device. */
 };
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index e77476ca70ac..34ebc7615411 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -56,7 +56,7 @@
 #include "pvrdma.h"
 
 #define DRV_NAME	"vmw_pvrdma"
-#define DRV_VERSION	"1.0.0.0-k"
+#define DRV_VERSION	"1.0.1.0-k"
 
 static DEFINE_MUTEX(pvrdma_device_list_lock);
 static LIST_HEAD(pvrdma_device_list);
@@ -660,7 +660,16 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
 		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
 		break;
 	case NETDEV_UP:
-		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+		pvrdma_write_reg(dev, PVRDMA_REG_CTL,
+				 PVRDMA_DEVICE_CTL_UNQUIESCE);
+
+		mb();
+
+		if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
+			dev_err(&dev->pdev->dev,
+				"failed to activate device during link up\n");
+		else
+			pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
 		break;
 	default:
 		dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
-- 
cgit v1.2.3


From ded260235308f340b979258a4c736e06ba12c747 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 8 Mar 2017 08:21:52 +0300
Subject: IB/rxe: double free on error

"goto err;" has it's own kfree_skb() call so it's a double free.  We
only need to free on the "goto exit;" path.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index dbfde0dc6ff7..9f95f50b2909 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -729,11 +729,11 @@ next_wqe:
 	ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
 	if (ret) {
 		qp->need_req_skb = 1;
-		kfree_skb(skb);
 
 		rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
 
 		if (ret == -EAGAIN) {
+			kfree_skb(skb);
 			rxe_run_task(&qp->req.task, 1);
 			goto exit;
 		}
-- 
cgit v1.2.3


From 004d18ea99b0f3b7b3d5790e4dc5ca7d5238706d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 23 Feb 2017 13:40:16 +0300
Subject: RDMA/ocrdma: fix a type issue in ocrdma_put_pd_num()

We want to return zero on success or negative error codes.  The type
should be int and not u8.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index bc9fb144e57b..c52edeafd616 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -372,7 +372,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
 	return 0;
 }
 
-static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
+static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
 				   bool dpp_pool)
 {
 	int status;
-- 
cgit v1.2.3


From a1c5dd13228a1f9e5087375f9702422dfc2adbf1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Tue, 28 Feb 2017 21:42:53 +0200
Subject: IB/rxe: Update documentation link

All Soft-RoCE (rxe) is handled now in rdma-core user space library,
so the documentation. The patch below updates the documentation
link to that new location.

Reported-by: Josh Beavers <josh.beavers@gmail.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 7d1ac27ed251..6332dedc11e8 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -22,4 +22,4 @@ config RDMA_RXE
 	To configure and work with soft-RoCE driver please use the
 	following wiki page under "configure Soft-RoCE (RXE)" section:
 
-	https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+	https://github.com/linux-rdma/rdma-core/blob/master/Documentation/rxe.md
-- 
cgit v1.2.3


From 0957c29f78af7d890c4ac506eda8f76bfc5a137a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 7 Mar 2017 22:56:53 +0000
Subject: IB/core: Restore I/O MMU, s390 and powerpc support

Avoid that the following error message is reported on the console
while loading an RDMA driver with I/O MMU support enabled:

DMAR: Allocating domain for mlx5_0 failed

Ensure that DMA mapping operations that use to_pci_dev() to
access to struct pci_dev see the correct PCI device. E.g. the s390
and powerpc DMA mapping operations use to_pci_dev() even with I/O
MMU support disabled.

This patch preserves the following changes of the DMA mapping updates
patch series:
- Introduction of dma_virt_ops.
- Removal of ib_device.dma_ops.
- Removal of struct ib_dma_mapping_ops.
- Removal of an if-statement from each ib_dma_*() operation.
- IB HW drivers no longer set dma_device directly.

Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reported-by: Parav Pandit <parav@mellanox.com>
Fixes: commit 99db9494035f ("IB/core: Remove ib_device.dma_device")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: parav@mellanox.com
Tested-by: parav@mellanox.com
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/device.c | 26 ++++++++++++++++++++------
 include/rdma/ib_verbs.h          | 30 +++++++++++++++++-------------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 593d2ce6ec7c..addf869045cc 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -336,12 +336,26 @@ int ib_register_device(struct ib_device *device,
 	struct device *parent = device->dev.parent;
 
 	WARN_ON_ONCE(!parent);
-	if (!device->dev.dma_ops)
-		device->dev.dma_ops = parent->dma_ops;
-	if (!device->dev.dma_mask)
-		device->dev.dma_mask = parent->dma_mask;
-	if (!device->dev.coherent_dma_mask)
-		device->dev.coherent_dma_mask = parent->coherent_dma_mask;
+	WARN_ON_ONCE(device->dma_device);
+	if (device->dev.dma_ops) {
+		/*
+		 * The caller provided custom DMA operations. Copy the
+		 * DMA-related fields that are used by e.g. dma_alloc_coherent()
+		 * into device->dev.
+		 */
+		device->dma_device = &device->dev;
+		if (!device->dev.dma_mask)
+			device->dev.dma_mask = parent->dma_mask;
+		if (!device->dev.coherent_dma_mask)
+			device->dev.coherent_dma_mask =
+				parent->coherent_dma_mask;
+	} else {
+		/*
+		 * The caller did not provide custom DMA operations. Use the
+		 * DMA mapping operations of the parent device.
+		 */
+		device->dma_device = parent;
+	}
 
 	mutex_lock(&device_mutex);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0f1813c13687..99e4423eb2b8 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1863,6 +1863,9 @@ struct ib_port_immutable {
 };
 
 struct ib_device {
+	/* Do not access @dma_device directly from ULP nor from HW drivers. */
+	struct device                *dma_device;
+
 	char                          name[IB_DEVICE_NAME_MAX];
 
 	struct list_head              event_handler_list;
@@ -3007,7 +3010,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
  */
 static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
 {
-	return dma_mapping_error(&dev->dev, dma_addr);
+	return dma_mapping_error(dev->dma_device, dma_addr);
 }
 
 /**
@@ -3021,7 +3024,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
 				    void *cpu_addr, size_t size,
 				    enum dma_data_direction direction)
 {
-	return dma_map_single(&dev->dev, cpu_addr, size, direction);
+	return dma_map_single(dev->dma_device, cpu_addr, size, direction);
 }
 
 /**
@@ -3035,7 +3038,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
 				       u64 addr, size_t size,
 				       enum dma_data_direction direction)
 {
-	dma_unmap_single(&dev->dev, addr, size, direction);
+	dma_unmap_single(dev->dma_device, addr, size, direction);
 }
 
 /**
@@ -3052,7 +3055,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
 				  size_t size,
 					 enum dma_data_direction direction)
 {
-	return dma_map_page(&dev->dev, page, offset, size, direction);
+	return dma_map_page(dev->dma_device, page, offset, size, direction);
 }
 
 /**
@@ -3066,7 +3069,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
 				     u64 addr, size_t size,
 				     enum dma_data_direction direction)
 {
-	dma_unmap_page(&dev->dev, addr, size, direction);
+	dma_unmap_page(dev->dma_device, addr, size, direction);
 }
 
 /**
@@ -3080,7 +3083,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
 				struct scatterlist *sg, int nents,
 				enum dma_data_direction direction)
 {
-	return dma_map_sg(&dev->dev, sg, nents, direction);
+	return dma_map_sg(dev->dma_device, sg, nents, direction);
 }
 
 /**
@@ -3094,7 +3097,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
 				   struct scatterlist *sg, int nents,
 				   enum dma_data_direction direction)
 {
-	dma_unmap_sg(&dev->dev, sg, nents, direction);
+	dma_unmap_sg(dev->dma_device, sg, nents, direction);
 }
 
 static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
@@ -3102,7 +3105,8 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
 				      enum dma_data_direction direction,
 				      unsigned long dma_attrs)
 {
-	return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
+	return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+				dma_attrs);
 }
 
 static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
@@ -3110,7 +3114,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
 					 enum dma_data_direction direction,
 					 unsigned long dma_attrs)
 {
-	dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
+	dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
 }
 /**
  * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
@@ -3152,7 +3156,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
 					      size_t size,
 					      enum dma_data_direction dir)
 {
-	dma_sync_single_for_cpu(&dev->dev, addr, size, dir);
+	dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
 }
 
 /**
@@ -3167,7 +3171,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
 						 size_t size,
 						 enum dma_data_direction dir)
 {
-	dma_sync_single_for_device(&dev->dev, addr, size, dir);
+	dma_sync_single_for_device(dev->dma_device, addr, size, dir);
 }
 
 /**
@@ -3182,7 +3186,7 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
 					   dma_addr_t *dma_handle,
 					   gfp_t flag)
 {
-	return dma_alloc_coherent(&dev->dev, size, dma_handle, flag);
+	return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
 }
 
 /**
@@ -3196,7 +3200,7 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
 					size_t size, void *cpu_addr,
 					dma_addr_t dma_handle)
 {
-	dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle);
+	dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
 }
 
 /**
-- 
cgit v1.2.3


From 812755d69efcf7c12fded57983d456647a0bbadd Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Fri, 24 Feb 2017 03:28:13 +0300
Subject: uapi: fix rdma/mlx5-abi.h userspace compilation errors

Consistently use types from linux/types.h to fix the following
rdma/mlx5-abi.h userspace compilation errors:

/usr/include/rdma/mlx5-abi.h:69:25: error: 'u64' undeclared here (not in a function)
  MLX5_LIB_CAP_4K_UAR = (u64)1 << 0,
/usr/include/rdma/mlx5-abi.h:69:29: error: expected ',' or '}' before numeric constant
  MLX5_LIB_CAP_4K_UAR = (u64)1 << 0,

Include <linux/if_ether.h> to fix the following rdma/mlx5-abi.h
userspace compilation error:

/usr/include/rdma/mlx5-abi.h:286:12: error: 'ETH_ALEN' undeclared here (not in a function)
  __u8 dmac[ETH_ALEN];

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx5-abi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index da7cd62bace7..0b3d30837a9f 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -34,6 +34,7 @@
 #define MLX5_ABI_USER_H
 
 #include <linux/types.h>
+#include <linux/if_ether.h>	/* For ETH_ALEN. */
 
 enum {
 	MLX5_QP_FLAG_SIGNATURE		= 1 << 0,
@@ -66,7 +67,7 @@ struct mlx5_ib_alloc_ucontext_req {
 };
 
 enum mlx5_lib_caps {
-	MLX5_LIB_CAP_4K_UAR	= (u64)1 << 0,
+	MLX5_LIB_CAP_4K_UAR	= (__u64)1 << 0,
 };
 
 struct mlx5_ib_alloc_ucontext_req_v2 {
-- 
cgit v1.2.3


From 9fcd67d1772c43d2f23e8fca56acc7219e991676 Mon Sep 17 00:00:00 2001
From: David Marchand <david.marchand@6wind.com>
Date: Fri, 24 Feb 2017 15:38:26 +0100
Subject: IB/rxe: increment msn only when completing a request

According to C9-147, MSN should only be incremented when the last packet of
a multi packet request has been received.

"Logically, the requester associates a sequential Send Sequence Number
(SSN) with each WQE posted to the send queue. The SSN bears a one-
to-one relationship to the MSN returned by the responder in each re-
sponse packet. Therefore, when the requester receives a response, it in-
terprets the MSN as representing the SSN of the most recent request
completed by the responder to determine which send WQE(s) can be
completed."

Fixes: 8700e3e7c485 ("Soft RoCE driver")

Signed-off-by: David Marchand <david.marchand@6wind.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index d404a8aba7af..c9dd385ce62e 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -813,18 +813,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 		WARN_ON_ONCE(1);
 	}
 
-	/* We successfully processed this new request. */
-	qp->resp.msn++;
-
 	/* next expected psn, read handles this separately */
 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 
 	qp->resp.opcode = pkt->opcode;
 	qp->resp.status = IB_WC_SUCCESS;
 
-	if (pkt->mask & RXE_COMP_MASK)
+	if (pkt->mask & RXE_COMP_MASK) {
+		/* We successfully processed this new request. */
+		qp->resp.msn++;
 		return RESPST_COMPLETE;
-	else if (qp_type(qp) == IB_QPT_RC)
+	} else if (qp_type(qp) == IB_QPT_RC)
 		return RESPST_ACKNOWLEDGE;
 	else
 		return RESPST_CLEANUP;
-- 
cgit v1.2.3


From fedd9e1f75828992ace5833379116f38c66ad7bb Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 16 Mar 2017 18:57:00 +0200
Subject: IB/cq: Don't process more than the given budget

The caller might not want this overhead.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cq.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 2746d2eb3d52..f2ae75fa3128 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
 {
 	int i, n, completed = 0;
 
-	while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+	/*
+	 * budget might be (-1) if the caller does not
+	 * want to bound this call, thus we need unsigned
+	 * minimum here.
+	 */
+	while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
+			budget - completed), cq->wc)) > 0) {
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = &cq->wc[i];
 
-- 
cgit v1.2.3


From b7363e67b23e04c23c2a99437feefac7292a88bc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 8 Mar 2017 22:03:17 +0200
Subject: IB/device: Convert ib-comp-wq to be CPU-bound

This workqueue is used by our storage target mode ULPs
via the new CQ API. Recent observations when working
with very high-end flash storage devices reveal that
UNBOUND workqueue threads can migrate between cpu cores
and even numa nodes (although some numa locality is accounted
for).

While this attribute can be useful in some workloads,
it does not fit in very nicely with the normal
run-to-completion model we usually use in our target-mode
ULPs and the block-mq irq<->cpu affinity facilities.

The whole block-mq concept is that the completion will
land on the same cpu where the submission was performed.
The fact that our submitter thread is migrating cpus
can break this locality.

We assume that as a target mode ULP, we will serve multiple
initiators/clients and we can spread the load enough without
having to use unbound kworkers.

Also, while we're at it, expose this workqueue via sysfs which
is harmless and can be useful for debug.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>--
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/device.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index addf869045cc..7c9e34d679d3 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1029,8 +1029,7 @@ static int __init ib_core_init(void)
 		return -ENOMEM;
 
 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
-			WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
-			WQ_UNBOUND_MAX_ACTIVE);
+			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
 	if (!ib_comp_wq) {
 		ret = -ENOMEM;
 		goto err;
-- 
cgit v1.2.3


From ea174c9573b0e0c8bc1a7a90fe9360ccb7aa9cbb Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 Feb 2017 20:16:33 +0200
Subject: RDMA/iser: Fix possible mr leak on device removal event

When the rdma device is removed, we must cleanup all
the rdma resources within the DEVICE_REMOVAL event
handler to let the device teardown gracefully. When
this happens with live I/O, some memory regions are
occupied. Thus, track them too and dereg all the mr's.

We are safe with mr access by iscsi_iser_cleanup_task.

Reported-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++
 drivers/infiniband/ulp/iser/iser_verbs.c | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d0b22ad58c1..c1ae4aeae2f9 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@ struct iser_fr_desc {
 	struct list_head		  list;
 	struct iser_reg_resources	  rsc;
 	struct iser_pi_context		 *pi_ctx;
+	struct list_head                  all_list;
 };
 
 /**
@@ -443,6 +444,7 @@ struct iser_fr_pool {
 	struct list_head        list;
 	spinlock_t              lock;
 	int                     size;
+	struct list_head        all_list;
 };
 
 /**
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 30b622f2ab73..c538a38c91ce 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 	int i, ret;
 
 	INIT_LIST_HEAD(&fr_pool->list);
+	INIT_LIST_HEAD(&fr_pool->all_list);
 	spin_lock_init(&fr_pool->lock);
 	fr_pool->size = 0;
 	for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 		}
 
 		list_add_tail(&desc->list, &fr_pool->list);
+		list_add_tail(&desc->all_list, &fr_pool->all_list);
 		fr_pool->size++;
 	}
 
@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
 	struct iser_fr_desc *desc, *tmp;
 	int i = 0;
 
-	if (list_empty(&fr_pool->list))
+	if (list_empty(&fr_pool->all_list))
 		return;
 
 	iser_info("freeing conn %p fr pool\n", ib_conn);
 
-	list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
-		list_del(&desc->list);
+	list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+		list_del(&desc->all_list);
 		iser_free_reg_res(&desc->rsc);
 		if (desc->pi_ctx)
 			iser_free_pi_ctx(desc->pi_ctx);
-- 
cgit v1.2.3


From f6aafac184a3e46e919769dd4faa8bf0dc436534 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 13:18:45 +0100
Subject: IB/qib: fix false-postive maybe-uninitialized warning

aarch64-linux-gcc-7 complains about code it doesn't fully understand:

drivers/infiniband/hw/qib/qib_iba7322.c: In function 'qib_7322_txchk_change':
include/asm-generic/bitops/non-atomic.h:105:35: error: 'shadow' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The code is right, and despite trying hard, I could not come up with a version
that I liked better than just adding a fake initialization here to shut up the
warning.

Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qib/qib_iba7322.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 12c4208fd701..af9f596bb68b 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -7068,7 +7068,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
 	unsigned long flags;
 
 	while (wait) {
-		unsigned long shadow;
+		unsigned long shadow = 0;
 		int cstart, previ = -1;
 
 		/*
-- 
cgit v1.2.3


From dac7a4b4b1f664934e8b713f529b629f67db313c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Mar 2017 17:22:47 -0400
Subject: ext4: lock the xattr block before checksuming it

We must lock the xattr block before calculating or verifying the
checksum in order to avoid spurious checksum failures.

https://bugzilla.kernel.org/show_bug.cgi?id=193661

Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/xattr.c | 65 +++++++++++++++++++++++++++------------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 67636acf7624..996e7900d4c8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -131,31 +131,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
 }
 
 static int ext4_xattr_block_csum_verify(struct inode *inode,
-					sector_t block_nr,
-					struct ext4_xattr_header *hdr)
+					struct buffer_head *bh)
 {
-	if (ext4_has_metadata_csum(inode->i_sb) &&
-	    (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
-		return 0;
-	return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
-				      sector_t block_nr,
-				      struct ext4_xattr_header *hdr)
-{
-	if (!ext4_has_metadata_csum(inode->i_sb))
-		return;
+	struct ext4_xattr_header *hdr = BHDR(bh);
+	int ret = 1;
 
-	hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+	if (ext4_has_metadata_csum(inode->i_sb)) {
+		lock_buffer(bh);
+		ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+							bh->b_blocknr, hdr));
+		unlock_buffer(bh);
+	}
+	return ret;
 }
 
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
-						struct inode *inode,
-						struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+				      struct buffer_head *bh)
 {
-	ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
-	return ext4_handle_dirty_metadata(handle, inode, bh);
+	if (ext4_has_metadata_csum(inode->i_sb))
+		BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+						bh->b_blocknr, BHDR(bh));
 }
 
 static inline const struct xattr_handler *
@@ -233,7 +228,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		return -EFSCORRUPTED;
-	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+	if (!ext4_xattr_block_csum_verify(inode, bh))
 		return -EFSBADCRC;
 	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
 				       bh->b_data);
@@ -618,23 +613,22 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 			}
 		}
 
+		ext4_xattr_block_csum_set(inode, bh);
 		/*
 		 * Beware of this ugliness: Releasing of xattr block references
 		 * from different inodes can race and so we have to protect
 		 * from a race where someone else frees the block (and releases
 		 * its journal_head) before we are done dirtying the buffer. In
 		 * nojournal mode this race is harmless and we actually cannot
-		 * call ext4_handle_dirty_xattr_block() with locked buffer as
+		 * call ext4_handle_dirty_metadata() with locked buffer as
 		 * that function can call sync_dirty_buffer() so for that case
 		 * we handle the dirtying after unlocking the buffer.
 		 */
 		if (ext4_handle_valid(handle))
-			error = ext4_handle_dirty_xattr_block(handle, inode,
-							      bh);
+			error = ext4_handle_dirty_metadata(handle, inode, bh);
 		unlock_buffer(bh);
 		if (!ext4_handle_valid(handle))
-			error = ext4_handle_dirty_xattr_block(handle, inode,
-							      bh);
+			error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
 		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -863,13 +857,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				ext4_xattr_cache_insert(ext4_mb_cache,
 					bs->bh);
 			}
+			ext4_xattr_block_csum_set(inode, bs->bh);
 			unlock_buffer(bs->bh);
 			if (error == -EFSCORRUPTED)
 				goto bad_block;
 			if (!error)
-				error = ext4_handle_dirty_xattr_block(handle,
-								      inode,
-								      bs->bh);
+				error = ext4_handle_dirty_metadata(handle,
+								   inode,
+								   bs->bh);
 			if (error)
 				goto cleanup;
 			goto inserted;
@@ -967,10 +962,11 @@ inserted:
 					ce->e_reusable = 0;
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					  ref);
+				ext4_xattr_block_csum_set(inode, new_bh);
 				unlock_buffer(new_bh);
-				error = ext4_handle_dirty_xattr_block(handle,
-								      inode,
-								      new_bh);
+				error = ext4_handle_dirty_metadata(handle,
+								   inode,
+								   new_bh);
 				if (error)
 					goto cleanup_dquot;
 			}
@@ -1020,11 +1016,12 @@ getblk_failed:
 				goto getblk_failed;
 			}
 			memcpy(new_bh->b_data, s->base, new_bh->b_size);
+			ext4_xattr_block_csum_set(inode, new_bh);
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
 			ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
-			error = ext4_handle_dirty_xattr_block(handle,
-							      inode, new_bh);
+			error = ext4_handle_dirty_metadata(handle, inode,
+							   new_bh);
 			if (error)
 				goto cleanup;
 		}
-- 
cgit v1.2.3


From d67d64f423147cf4fe8212658255e1160a4ef02c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Mar 2017 17:33:31 -0400
Subject: ext4: fix two spelling nits

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c       | 2 +-
 fs/ext4/move_extent.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f622d4a577e3..f303d3a7f44a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5400,7 +5400,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	 * If there is inline data in the inode, the inode will normally not
 	 * have data blocks allocated (it may have an external xattr block).
 	 * Report at least one sector for such files, so tools like tar, rsync,
-	 * others doen't incorrectly think the file is completely sparse.
+	 * others don't incorrectly think the file is completely sparse.
 	 */
 	if (unlikely(ext4_has_inline_data(inode)))
 		stat->blocks += (stat->size + 511) >> 9;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6fc14def0c70..615bc03d0fbd 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -511,7 +511,7 @@ mext_check_arguments(struct inode *orig_inode,
 	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
 	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
 		ext4_debug("ext4 move extent: orig and donor's start "
-			"offset are not alligned [ino:orig %lu, donor %lu]\n",
+			"offsets are not aligned [ino:orig %lu, donor %lu]\n",
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From c02ed2e75ef4c74e41e421acb4ef1494671585e8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 26 Mar 2017 14:15:16 -0700
Subject: Linux 4.11-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b2faa9319372..231e6a7749bd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3