From eccaf52fee8305d5207ff110950a82c100e459bc Mon Sep 17 00:00:00 2001
From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com>
Date: Thu, 2 May 2013 22:07:01 +0800
Subject: x86, efi: initial the local variable of DataSize to zero

That will be better initial the value of DataSize to zero for the input of
GetVariable(), otherwise we will feed a random value. The debug log of input
DataSize like this:

...
[  195.915612] EFI Variables Facility v0.08 2004-May-17
[  195.915819] efi: size: 18446744071581821342
[  195.915969] efi:  size': 18446744071581821342
[  195.916324] efi: size: 18446612150714306560
[  195.916632] efi:  size': 18446612150714306560
[  195.917159] efi: size: 18446612150714306560
[  195.917453] efi:  size': 18446612150714306560
...

The size' is value that was returned by BIOS.

After applied this patch:
[   82.442042] EFI Variables Facility v0.08 2004-May-17
[   82.442202] efi: size: 0
[   82.442360] efi:  size': 1039
[   82.443828] efi: size: 0
[   82.444127] efi:  size': 2616
[   82.447057] efi: size: 0
[   82.447356] efi:  size': 5832
...

Found on Acer Aspire V3 BIOS, it will not return the size of data if we input a
non-zero DataSize.

Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Lee, Chun-Yi <jlee@suse.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 55856b2310d3..82089d8b1954 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -206,7 +206,7 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 	}
 
 	if (boot_used_size && !finished) {
-		unsigned long size;
+		unsigned long size = 0;
 		u32 attr;
 		efi_status_t s;
 		void *tmp;
-- 
cgit v1.2.3


From e9d0626ed43a41a3fc526d1df06122b0d4eac174 Mon Sep 17 00:00:00 2001
From: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Date: Tue, 14 May 2013 14:48:58 +0800
Subject: x86-64, init: Fix a possible wraparound bug in switchover in
 head_64.S

In head_64.S, a switchover has been used to handle kernel crossing
1G, 512G boundaries.

And commit 8170e6bed465b4b0c7687f93e9948aca4358a33b
    x86, 64bit: Use a #PF handler to materialize early mappings on demand
said:
    During the switchover in head_64.S, before #PF handler is available,
    we use three pages to handle kernel crossing 1G, 512G boundaries with
    sharing page by playing games with page aliasing: the same page is
    mapped twice in the higher-level tables with appropriate wraparound.

But from the switchover code, when we set up the PUD table:
114         addq    $4096, %rdx
115         movq    %rdi, %rax
116         shrq    $PUD_SHIFT, %rax
117         andl    $(PTRS_PER_PUD-1), %eax
118         movq    %rdx, (4096+0)(%rbx,%rax,8)
119         movq    %rdx, (4096+8)(%rbx,%rax,8)

It seems line 119 has a potential bug there. For example,
if the kernel is loaded at physical address 511G+1008M, that is
    000000000 111111111 111111000 000000000000000000000
and the kernel _end is 512G+2M, that is
    000000001 000000000 000000001 000000000000000000000
So in this example, when using the 2nd page to setup PUD (line 114~119),
rax is 511.
In line 118, we put rdx which is the address of the PMD page (the 3rd page)
into entry 511 of the PUD table. But in line 119, the entry we calculate from
(4096+8)(%rbx,%rax,8) has exceeded the PUD page. IMO, the entry in line
119 should be wraparound into entry 0 of the PUD table.

The patch fixes the bug.

Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Link: http://lkml.kernel.org/r/5191DE5A.3020302@cn.fujitsu.com
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: <stable@vger.kernel.org> v3.9
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/head_64.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 08f7e8039099..321d65ebaffe 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -115,8 +115,10 @@ startup_64:
 	movq	%rdi, %rax
 	shrq	$PUD_SHIFT, %rax
 	andl	$(PTRS_PER_PUD-1), %eax
-	movq	%rdx, (4096+0)(%rbx,%rax,8)
-	movq	%rdx, (4096+8)(%rbx,%rax,8)
+	movq	%rdx, 4096(%rbx,%rax,8)
+	incl	%eax
+	andl	$(PTRS_PER_PUD-1), %eax
+	movq	%rdx, 4096(%rbx,%rax,8)
 
 	addq	$8192, %rbx
 	movq	%rdi, %rax
-- 
cgit v1.2.3


From 1db01b4903639fcfaec213701a494fe3fb2c490b Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader@canonical.com>
Date: Wed, 8 May 2013 16:37:35 +0200
Subject: xen: Clean up apic ipi interface

Commit f447d56d36af18c5104ff29dcb1327c0c0ac3634 introduced the
implementation of the PV apic ipi interface. But there were some
odd things (it seems none of which cause really any issue but
maybe they should be cleaned up anyway):
 - xen_send_IPI_mask_allbutself (and by that xen_send_IPI_allbutself)
   ignore the passed in vector and only use the CALL_FUNCTION_SINGLE
   vector. While xen_send_IPI_all and xen_send_IPI_mask use the vector.
 - physflat_send_IPI_allbutself is declared unnecessarily. It is never
   used.

This patch tries to clean up those things.

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/smp.c | 10 ++++------
 arch/x86/xen/smp.h |  1 -
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8ff37995d54e..fb44426fe931 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -576,24 +576,22 @@ void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
 {
 	unsigned cpu;
 	unsigned int this_cpu = smp_processor_id();
+	int xen_vector = xen_map_vector(vector);
 
-	if (!(num_online_cpus() > 1))
+	if (!(num_online_cpus() > 1) || (xen_vector < 0))
 		return;
 
 	for_each_cpu_and(cpu, mask, cpu_online_mask) {
 		if (this_cpu == cpu)
 			continue;
 
-		xen_smp_send_call_function_single_ipi(cpu);
+		xen_send_IPI_one(cpu, xen_vector);
 	}
 }
 
 void xen_send_IPI_allbutself(int vector)
 {
-	int xen_vector = xen_map_vector(vector);
-
-	if (xen_vector >= 0)
-		xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector);
+	xen_send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index 8981a76d081a..c7c2d89efd76 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -5,7 +5,6 @@ extern void xen_send_IPI_mask(const struct cpumask *mask,
 extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
 				int vector);
 extern void xen_send_IPI_allbutself(int vector);
-extern void physflat_send_IPI_allbutself(int vector);
 extern void xen_send_IPI_all(int vector);
 extern void xen_send_IPI_self(int vector);
 
-- 
cgit v1.2.3


From 2baad6121e2b2fa3428ee6cb2298107be11ab23a Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Wed, 29 May 2013 13:43:54 +0100
Subject: x86, crc32-pclmul: Fix build with older binutils

binutils prior to 2.18 (e.g. the ones found on SLE10) don't support
assembling PEXTRD, so a macro based approach like the one for PCLMULQDQ
in the same file should be used.

This requires making the helper macros capable of recognizing 32-bit
general purpose register operands.

[ hpa: tagging for stable as it is a low risk build fix ]

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/51A6142A02000078000D99D8@nat28.tlf.novell.com
Cc: Alexander Boyko <alexander_boyko@xyratex.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Huang Ying <ying.huang@intel.com>
Cc: <stable@vger.kernel.org> v3.9
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/crypto/crc32-pclmul_asm.S |  2 +-
 arch/x86/include/asm/inst.h        | 74 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index 94c27df8a549..f247304299a2 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -240,7 +240,7 @@ fold_64:
 	pand    %xmm3, %xmm1
 	PCLMULQDQ 0x00, CONSTANT, %xmm1
 	pxor    %xmm2, %xmm1
-	pextrd  $0x01, %xmm1, %eax
+	PEXTRD  0x01, %xmm1, %eax
 
 	ret
 ENDPROC(crc32_pclmul_le_16)
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index 280bf7fb6aba..3e115273ed88 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -9,12 +9,68 @@
 
 #define REG_NUM_INVALID		100
 
-#define REG_TYPE_R64		0
-#define REG_TYPE_XMM		1
+#define REG_TYPE_R32		0
+#define REG_TYPE_R64		1
+#define REG_TYPE_XMM		2
 #define REG_TYPE_INVALID	100
 
+	.macro R32_NUM opd r32
+	\opd = REG_NUM_INVALID
+	.ifc \r32,%eax
+	\opd = 0
+	.endif
+	.ifc \r32,%ecx
+	\opd = 1
+	.endif
+	.ifc \r32,%edx
+	\opd = 2
+	.endif
+	.ifc \r32,%ebx
+	\opd = 3
+	.endif
+	.ifc \r32,%esp
+	\opd = 4
+	.endif
+	.ifc \r32,%ebp
+	\opd = 5
+	.endif
+	.ifc \r32,%esi
+	\opd = 6
+	.endif
+	.ifc \r32,%edi
+	\opd = 7
+	.endif
+#ifdef CONFIG_X86_64
+	.ifc \r32,%r8d
+	\opd = 8
+	.endif
+	.ifc \r32,%r9d
+	\opd = 9
+	.endif
+	.ifc \r32,%r10d
+	\opd = 10
+	.endif
+	.ifc \r32,%r11d
+	\opd = 11
+	.endif
+	.ifc \r32,%r12d
+	\opd = 12
+	.endif
+	.ifc \r32,%r13d
+	\opd = 13
+	.endif
+	.ifc \r32,%r14d
+	\opd = 14
+	.endif
+	.ifc \r32,%r15d
+	\opd = 15
+	.endif
+#endif
+	.endm
+
 	.macro R64_NUM opd r64
 	\opd = REG_NUM_INVALID
+#ifdef CONFIG_X86_64
 	.ifc \r64,%rax
 	\opd = 0
 	.endif
@@ -63,6 +119,7 @@
 	.ifc \r64,%r15
 	\opd = 15
 	.endif
+#endif
 	.endm
 
 	.macro XMM_NUM opd xmm
@@ -118,10 +175,13 @@
 	.endm
 
 	.macro REG_TYPE type reg
+	R32_NUM reg_type_r32 \reg
 	R64_NUM reg_type_r64 \reg
 	XMM_NUM reg_type_xmm \reg
 	.if reg_type_r64 <> REG_NUM_INVALID
 	\type = REG_TYPE_R64
+	.elseif reg_type_r32 <> REG_NUM_INVALID
+	\type = REG_TYPE_R32
 	.elseif reg_type_xmm <> REG_NUM_INVALID
 	\type = REG_TYPE_XMM
 	.else
@@ -162,6 +222,16 @@
 	.byte \imm8
 	.endm
 
+	.macro PEXTRD imm8 xmm gpr
+	R32_NUM extrd_opd1 \gpr
+	XMM_NUM extrd_opd2 \xmm
+	PFX_OPD_SIZE
+	PFX_REX extrd_opd1 extrd_opd2
+	.byte 0x0f, 0x3a, 0x16
+	MODRM 0xc0 extrd_opd1 extrd_opd2
+	.byte \imm8
+	.endm
+
 	.macro AESKEYGENASSIST rcon xmm1 xmm2
 	XMM_NUM aeskeygen_opd1 \xmm1
 	XMM_NUM aeskeygen_opd2 \xmm2
-- 
cgit v1.2.3


From 5187b28ff08249ab8a162e802209ed04e271ca02 Mon Sep 17 00:00:00 2001
From: Pekka Riikonen <priikone@iki.fi>
Date: Mon, 13 May 2013 14:32:07 +0200
Subject: x86: Allow FPU to be used at interrupt time even with eagerfpu

With the addition of eagerfpu the irq_fpu_usable() now returns false
negatives especially in the case of ksoftirqd and interrupted idle task,
two common cases for FPU use for example in networking/crypto.  With
eagerfpu=off FPU use is possible in those contexts.  This is because of
the eagerfpu check in interrupted_kernel_fpu_idle():

...
  * For now, with eagerfpu we will return interrupted kernel FPU
  * state as not-idle. TBD: Ideally we can change the return value
  * to something like __thread_has_fpu(current). But we need to
  * be careful of doing __thread_clear_has_fpu() before saving
  * the FPU etc for supporting nested uses etc. For now, take
  * the simple route!
...
 	if (use_eager_fpu())
 		return 0;

As eagerfpu is automatically "on" on those CPUs that also have the
features like AES-NI this patch changes the eagerfpu check to return 1 in
case the kernel_fpu_begin() has not been said yet.  Once it has been the
__thread_has_fpu() will start returning 0.

Notice that with eagerfpu the __thread_has_fpu is always true initially.
FPU use is thus always possible no matter what task is under us, unless
the state has already been saved with kernel_fpu_begin().

[ hpa: this is a performance regression, not a correctness regression,
  but since it can be quite serious on CPUs which need encryption at
  interrupt time I am marking this for urgent/stable. ]

Signed-off-by: Pekka Riikonen <priikone@iki.fi>
Link: http://lkml.kernel.org/r/alpine.GSO.2.00.1305131356320.18@git.silcnet.org
Cc: <stable@vger.kernel.org> v3.7+
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/i387.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 245a71db401a..cb339097b9ea 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -22,23 +22,19 @@
 /*
  * Were we in an interrupt that interrupted kernel mode?
  *
- * For now, with eagerfpu we will return interrupted kernel FPU
- * state as not-idle. TBD: Ideally we can change the return value
- * to something like __thread_has_fpu(current). But we need to
- * be careful of doing __thread_clear_has_fpu() before saving
- * the FPU etc for supporting nested uses etc. For now, take
- * the simple route!
- *
  * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
  * pair does nothing at all: the thread must not have fpu (so
  * that we don't try to save the FPU state), and TS must
  * be set (so that the clts/stts pair does nothing that is
  * visible in the interrupted kernel thread).
+ *
+ * Except for the eagerfpu case when we return 1 unless we've already
+ * been eager and saved the state in kernel_fpu_begin().
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
 	if (use_eager_fpu())
-		return 0;
+		return __thread_has_fpu(current);
 
 	return !__thread_has_fpu(current) &&
 		(read_cr0() & X86_CR0_TS);
@@ -78,8 +74,8 @@ void __kernel_fpu_begin(void)
 	struct task_struct *me = current;
 
 	if (__thread_has_fpu(me)) {
-		__save_init_fpu(me);
 		__thread_clear_has_fpu(me);
+		__save_init_fpu(me);
 		/* We do 'stts()' in __kernel_fpu_end() */
 	} else if (!use_eager_fpu()) {
 		this_cpu_write(fpu_owner_task, NULL);
-- 
cgit v1.2.3


From 7de3d66b1387ddf5a37d9689e5eb8510fb75c765 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 31 May 2013 08:53:07 -0700
Subject: x86: Fix adjust_range_size_mask calling position

Commit

    8d57470d x86, mm: setup page table in top-down

causes a kernel panic while setting mem=2G.

     [mem 0x00000000-0x000fffff] page 4k
     [mem 0x7fe00000-0x7fffffff] page 1G
     [mem 0x7c000000-0x7fdfffff] page 1G
     [mem 0x00100000-0x001fffff] page 4k
     [mem 0x00200000-0x7bffffff] page 2M

for last entry is not what we want, we should have
     [mem 0x00200000-0x3fffffff] page 2M
     [mem 0x40000000-0x7bffffff] page 1G

Actually we merge the continuous ranges with same page size too early.
in this case, before merging we have
     [mem 0x00200000-0x3fffffff] page 2M
     [mem 0x40000000-0x7bffffff] page 2M
after merging them, will get
     [mem 0x00200000-0x7bffffff] page 2M
even we can use 1G page to map
     [mem 0x40000000-0x7bffffff]

that will cause problem, because we already map
     [mem 0x7fe00000-0x7fffffff] page 1G
     [mem 0x7c000000-0x7fdfffff] page 1G
with 1G page, aka [0x40000000-0x7fffffff] is mapped with 1G page already.
During phys_pud_init() for [0x40000000-0x7bffffff], it will not
reuse existing that pud page, and allocate new one then try to use
2M page to map it instead, as page_size_mask does not include
PG_LEVEL_1G. At end will have [7c000000-0x7fffffff] not mapped, loop
in phys_pmd_init stop mapping at 0x7bffffff.

That is right behavoir, it maps exact range with exact page size that
we ask, and we should explicitly call it to map [7c000000-0x7fffffff]
before or after mapping 0x40000000-0x7bffffff.
Anyway we need to make sure ranges' page_size_mask correct and consistent
after split_mem_range for each range.

Fix that by calling adjust_range_size_mask before merging range
with same page size.

-v2: update change log.
-v3: add more explanation why [7c000000-0x7fffffff] is not mapped, and
    it causes panic.

Bisected-by: "Xie, ChanglongX" <changlongx.xie@intel.com>
Bisected-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reported-and-tested-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1370015587-20835-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org> v3.9
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/mm/init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index eaac1743def7..1f34e9219775 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -277,6 +277,9 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
 	end_pfn = limit_pfn;
 	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
 
+	if (!after_bootmem)
+		adjust_range_page_size_mask(mr, nr_range);
+
 	/* try to merge same page size and continuous */
 	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
 		unsigned long old_start;
@@ -291,9 +294,6 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
 		nr_range--;
 	}
 
-	if (!after_bootmem)
-		adjust_range_page_size_mask(mr, nr_range);
-
 	for (i = 0; i < nr_range; i++)
 		printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
 				mr[i].start, mr[i].end - 1,
-- 
cgit v1.2.3


From 103f98ea64a1b0a67d8a1b23070b4db3533db2b8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 30 May 2013 13:22:39 +0200
Subject: KVM: Emulate multibyte NOP

This is encountered when booting RHEL5.9 64-bit.  There is another bug
after this one that is not a simple emulation failure, but this one lets
the boot proceed a bit.

Cc: <stable@vger.kernel.org> # 3.9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8db0010ed150..0f42c2a48166 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3997,7 +3997,8 @@ static const struct opcode twobyte_table[256] = {
 	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
 	N, D(ImplicitOps | ModRM), N, N,
 	/* 0x10 - 0x1F */
-	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
+	N, N, N, N, N, N, N, N,
+	D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
 	/* 0x20 - 0x2F */
 	DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
 	DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
@@ -4836,6 +4837,7 @@ twobyte_insn:
 	case 0x08:		/* invd */
 	case 0x0d:		/* GrpP (prefetch) */
 	case 0x18:		/* Grp16 (prefetch/nop) */
+	case 0x1f:		/* nop */
 		break;
 	case 0x20: /* mov cr, reg */
 		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
-- 
cgit v1.2.3


From 8acb42070ec4c87a9baab5c7bac626030d5bef28 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 30 May 2013 16:35:55 +0200
Subject: KVM: fix sil/dil/bpl/spl in the mod/rm fields

The x86-64 extended low-byte registers were fetched correctly from reg,
but not from mod/rm.

This fixes another bug in the boot of RHEL5.9 64-bit, but it is still
not enough.

Cc: <stable@vger.kernel.org> # 3.9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0f42c2a48166..5953dcea752d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1240,9 +1240,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	ctxt->modrm_seg = VCPU_SREG_DS;
 
 	if (ctxt->modrm_mod == 3) {
+		int highbyte_regs = ctxt->rex_prefix == 0;
+
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp);
+		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
+					       highbyte_regs && (ctxt->d & ByteOp));
 		if (ctxt->d & Sse) {
 			op->type = OP_XMM;
 			op->bytes = 16;
-- 
cgit v1.2.3


From 299018f44ac553dce3caf84df1d14c4764faa279 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Jun 2013 11:30:02 +0300
Subject: KVM: Fix race in apic->pending_events processing

apic->pending_events processing has a race that may cause INIT and
SIPI
processing to be reordered:

vpu0:                            vcpu1:
set INIT
                               test_and_clear_bit(KVM_APIC_INIT)
                                  process INIT
set INIT
set SIPI
                               test_and_clear_bit(KVM_APIC_SIPI)
                                  process SIPI

At the end INIT is left pending in pending_events. The following patch
fixes this by latching pending event before processing them.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/lapic.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e1adbb4aca75..0eee2c8b64d1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1861,11 +1861,14 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	unsigned int sipi_vector;
+	unsigned long pe;
 
-	if (!kvm_vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
 		return;
 
-	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
+	pe = xchg(&apic->pending_events, 0);
+
+	if (test_bit(KVM_APIC_INIT, &pe)) {
 		kvm_lapic_reset(vcpu);
 		kvm_vcpu_reset(vcpu);
 		if (kvm_vcpu_is_bsp(apic->vcpu))
@@ -1873,7 +1876,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 		else
 			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 	}
-	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) &&
+	if (test_bit(KVM_APIC_SIPI, &pe) &&
 	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 		/* evaluate pending_events before reading the vector */
 		smp_rmb();
-- 
cgit v1.2.3


From 466318a87f28cb3ba0d08a3b7ef1a37ae73d5aa7 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 3 Jun 2013 10:33:55 -0400
Subject: xen/smp: Fixup NOHZ per cpu data when onlining an offline CPU.

The xen_play_dead is an undead function. When the vCPU is told to
offline it ends up calling xen_play_dead wherin it calls the
VCPUOP_down hypercall which offlines the vCPU. However, when the
vCPU is onlined back, it resumes execution right after
VCPUOP_down hypercall.

That was OK (albeit the API for play_dead assumes that the CPU
stays dead and never returns) but with commit 4b0c0f294
(tick: Cleanup NOHZ per cpu data on cpu down) that is no longer safe
as said commit resets the ts->inidle which at the start of the
cpu_idle loop was set.

The net effect is that we get this warn:

Broke affinity for irq 16
installing Xen timer for CPU 1
cpu 1 spinlock event irq 48
------------[ cut here ]------------
WARNING: at /home/konrad/linux-linus/kernel/time/tick-sched.c:935 tick_nohz_idle_exit+0x195/0x1b0()
Modules linked in: dm_multipath dm_mod xen_evtchn iscsi_boot_sysfs
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.10.0-rc3upstream-00068-gdcdbe33 #1
Hardware name: BIOSTAR Group N61PB-M2S/N61PB-M2S, BIOS 6.00 PG 09/03/2009
 ffffffff8193b448 ffff880039da5e60 ffffffff816707c8 ffff880039da5ea0
 ffffffff8108ce8b ffff880039da4010 ffff88003fa8e500 ffff880039da4010
 0000000000000001 ffff880039da4000 ffff880039da4010 ffff880039da5eb0
Call Trace:
 [<ffffffff816707c8>] dump_stack+0x19/0x1b
 [<ffffffff8108ce8b>] warn_slowpath_common+0x6b/0xa0
 [<ffffffff8108ced5>] warn_slowpath_null+0x15/0x20
 [<ffffffff810e4745>] tick_nohz_idle_exit+0x195/0x1b0
 [<ffffffff810da755>] cpu_startup_entry+0x205/0x250
 [<ffffffff81661070>] cpu_bringup_and_idle+0x13/0x15
---[ end trace 915c8c486004dda1 ]---

b/c ts_inidle is set to zero. Thomas suggested that we just add a workaround
to call tick_nohz_idle_enter before returning from xen_play_dead() - and
that is what this patch does and fixes the issue.

We also add the stable part b/c git commit 4b0c0f294 is on the stable
tree.

CC: stable@vger.kernel.org
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/smp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index fb44426fe931..d99cae8147d1 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/irq_work.h>
+#include <linux/tick.h>
 
 #include <asm/paravirt.h>
 #include <asm/desc.h>
@@ -447,6 +448,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
 	play_dead_common();
 	HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
 	cpu_bringup();
+	/*
+	 * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
+	 * clears certain data that the cpu_idle loop (which called us
+	 * and that we return from) expects. The only way to get that
+	 * data back is to call:
+	 */
+	tick_nohz_idle_enter();
 }
 
 #else /* !CONFIG_HOTPLUG_CPU */
-- 
cgit v1.2.3


From 65694c5aaddfedd9da082e4e150cafc6b3fc8a6a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Wed, 5 Jun 2013 15:15:41 +0100
Subject: x86/PCI: Map PCI setup data with ioremap() so it can be in highmem

f9a37be0f0 ("x86: Use PCI setup data") added support for using PCI ROM
images from setup_data.  This used phys_to_virt(), which is not valid for
highmem addresses, and can cause a crash when booting a 32-bit kernel via
the EFI boot stub.

pcibios_add_device() assumes that the physical addresses stored in
setup_data are accessible via the direct kernel mapping, and that calling
phys_to_virt() is valid.  This isn't guaranteed to be true on x86 where the
direct mapping range is much smaller than on x86-64.

Calling phys_to_virt() on a highmem address results in the following:

 BUG: unable to handle kernel paging request at 39a3c198
 IP: [<c262be0f>] pcibios_add_device+0x2f/0x90
 ...
 Call Trace:
  [<c2370c73>] pci_device_add+0xe3/0x130
  [<c274640b>] pci_scan_single_device+0x8b/0xb0
  [<c2370d08>] pci_scan_slot+0x48/0x100
  [<c2371904>] pci_scan_child_bus+0x24/0xc0
  [<c262a7b0>] pci_acpi_scan_root+0x2c0/0x490
  [<c23b7203>] acpi_pci_root_add+0x312/0x42f
  ...

The solution is to use ioremap() instead of phys_to_virt() to map the
setup data into the kernel address space.

[bhelgaas: changelog]
Tested-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Seth Forshee <seth.forshee@canonical.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: stable@vger.kernel.org	# v3.8+
---
 arch/x86/pci/common.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 305c68b8d538..981c2dbd72cc 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -628,7 +628,9 @@ int pcibios_add_device(struct pci_dev *dev)
 
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
-		data = phys_to_virt(pa_data);
+		data = ioremap(pa_data, sizeof(*rom));
+		if (!data)
+			return -ENOMEM;
 
 		if (data->type == SETUP_PCI) {
 			rom = (struct pci_setup_rom *)data;
@@ -645,6 +647,7 @@ int pcibios_add_device(struct pci_dev *dev)
 			}
 		}
 		pa_data = data->next;
+		iounmap(data);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From f8b8404337de4e2466e2e1139ea68b1f8295974f Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthew.garrett@nebula.com>
Date: Sat, 1 Jun 2013 16:06:20 -0400
Subject: Modify UEFI anti-bricking code

This patch reworks the UEFI anti-bricking code, including an effective
reversion of cc5a080c and 31ff2f20. It turns out that calling
QueryVariableInfo() from boot services results in some firmware
implementations jumping to physical addresses even after entering virtual
mode, so until we have 1:1 mappings for UEFI runtime space this isn't
going to work so well.

Reverting these gets us back to the situation where we'd refuse to create
variables on some systems because they classify deleted variables as "used"
until the firmware triggers a garbage collection run, which they won't do
until they reach a lower threshold. This results in it being impossible to
install a bootloader, which is unhelpful.

Feedback from Samsung indicates that the firmware doesn't need more than
5KB of storage space for its own purposes, so that seems like a reasonable
threshold. However, there's still no guarantee that a platform will attempt
garbage collection merely because it drops below this threshold. It seems
that this is often only triggered if an attempt to write generates a
genuine EFI_OUT_OF_RESOURCES error. We can force that by attempting to
create a variable larger than the remaining space. This should fail, but if
it somehow succeeds we can then immediately delete it.

I've tested this on the UEFI machines I have available, but I don't have
a Samsung and so can't verify that it avoids the bricking problem.

Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com>
Signed-off-by: Lee, Chun-Y <jlee@suse.com> [ dummy variable cleanup ]
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c      |  47 ---------
 arch/x86/include/asm/efi.h            |   7 --
 arch/x86/include/uapi/asm/bootparam.h |   1 -
 arch/x86/platform/efi/efi.c           | 188 ++++++++++++----------------------
 4 files changed, 65 insertions(+), 178 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 35ee62fccf98..c205035a6b96 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -251,51 +251,6 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
 	*size = len;
 }
 
-static efi_status_t setup_efi_vars(struct boot_params *params)
-{
-	struct setup_data *data;
-	struct efi_var_bootdata *efidata;
-	u64 store_size, remaining_size, var_size;
-	efi_status_t status;
-
-	if (sys_table->runtime->hdr.revision < EFI_2_00_SYSTEM_TABLE_REVISION)
-		return EFI_UNSUPPORTED;
-
-	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
-	while (data && data->next)
-		data = (struct setup_data *)(unsigned long)data->next;
-
-	status = efi_call_phys4((void *)sys_table->runtime->query_variable_info,
-				EFI_VARIABLE_NON_VOLATILE |
-				EFI_VARIABLE_BOOTSERVICE_ACCESS |
-				EFI_VARIABLE_RUNTIME_ACCESS, &store_size,
-				&remaining_size, &var_size);
-
-	if (status != EFI_SUCCESS)
-		return status;
-
-	status = efi_call_phys3(sys_table->boottime->allocate_pool,
-				EFI_LOADER_DATA, sizeof(*efidata), &efidata);
-
-	if (status != EFI_SUCCESS)
-		return status;
-
-	efidata->data.type = SETUP_EFI_VARS;
-	efidata->data.len = sizeof(struct efi_var_bootdata) -
-		sizeof(struct setup_data);
-	efidata->data.next = 0;
-	efidata->store_size = store_size;
-	efidata->remaining_size = remaining_size;
-	efidata->max_var_size = var_size;
-
-	if (data)
-		data->next = (unsigned long)efidata;
-	else
-		params->hdr.setup_data = (unsigned long)efidata;
-
-}
-
 static efi_status_t setup_efi_pci(struct boot_params *params)
 {
 	efi_pci_io_protocol *pci;
@@ -1202,8 +1157,6 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
 
 	setup_graphics(boot_params);
 
-	setup_efi_vars(boot_params);
-
 	setup_efi_pci(boot_params);
 
 	status = efi_call_phys3(sys_table->boottime->allocate_pool,
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 2fb5d5884e23..60c89f30c727 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -102,13 +102,6 @@ extern void efi_call_phys_epilog(void);
 extern void efi_unmap_memmap(void);
 extern void efi_memory_uc(u64 addr, unsigned long size);
 
-struct efi_var_bootdata {
-	struct setup_data data;
-	u64 store_size;
-	u64 remaining_size;
-	u64 max_var_size;
-};
-
 #ifdef CONFIG_EFI
 
 static inline bool efi_is_native(void)
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 08744242b8d2..c15ddaf90710 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -6,7 +6,6 @@
 #define SETUP_E820_EXT			1
 #define SETUP_DTB			2
 #define SETUP_PCI			3
-#define SETUP_EFI_VARS			4
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 82089d8b1954..5ae2eb09419e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -42,7 +42,6 @@
 #include <linux/io.h>
 #include <linux/reboot.h>
 #include <linux/bcd.h>
-#include <linux/ucs2_string.h>
 
 #include <asm/setup.h>
 #include <asm/efi.h>
@@ -54,12 +53,12 @@
 
 #define EFI_DEBUG	1
 
-/*
- * There's some additional metadata associated with each
- * variable. Intel's reference implementation is 60 bytes - bump that
- * to account for potential alignment constraints
- */
-#define VAR_METADATA_SIZE 64
+#define EFI_MIN_RESERVE 5120
+
+#define EFI_DUMMY_GUID \
+	EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
+
+static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
 
 struct efi __read_mostly efi = {
 	.mps        = EFI_INVALID_TABLE_ADDR,
@@ -79,13 +78,6 @@ struct efi_memory_map memmap;
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
-static u64 efi_var_store_size;
-static u64 efi_var_remaining_size;
-static u64 efi_var_max_var_size;
-static u64 boot_used_size;
-static u64 boot_var_size;
-static u64 active_size;
-
 unsigned long x86_efi_facility;
 
 /*
@@ -188,53 +180,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 					       efi_char16_t *name,
 					       efi_guid_t *vendor)
 {
-	efi_status_t status;
-	static bool finished = false;
-	static u64 var_size;
-
-	status = efi_call_virt3(get_next_variable,
-				name_size, name, vendor);
-
-	if (status == EFI_NOT_FOUND) {
-		finished = true;
-		if (var_size < boot_used_size) {
-			boot_var_size = boot_used_size - var_size;
-			active_size += boot_var_size;
-		} else {
-			printk(KERN_WARNING FW_BUG  "efi: Inconsistent initial sizes\n");
-		}
-	}
-
-	if (boot_used_size && !finished) {
-		unsigned long size = 0;
-		u32 attr;
-		efi_status_t s;
-		void *tmp;
-
-		s = virt_efi_get_variable(name, vendor, &attr, &size, NULL);
-
-		if (s != EFI_BUFFER_TOO_SMALL || !size)
-			return status;
-
-		tmp = kmalloc(size, GFP_ATOMIC);
-
-		if (!tmp)
-			return status;
-
-		s = virt_efi_get_variable(name, vendor, &attr, &size, tmp);
-
-		if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) {
-			var_size += size;
-			var_size += ucs2_strsize(name, 1024);
-			active_size += size;
-			active_size += VAR_METADATA_SIZE;
-			active_size += ucs2_strsize(name, 1024);
-		}
-
-		kfree(tmp);
-	}
-
-	return status;
+	return efi_call_virt3(get_next_variable,
+			      name_size, name, vendor);
 }
 
 static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -243,34 +190,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 					  unsigned long data_size,
 					  void *data)
 {
-	efi_status_t status;
-	u32 orig_attr = 0;
-	unsigned long orig_size = 0;
-
-	status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size,
-				       NULL);
-
-	if (status != EFI_BUFFER_TOO_SMALL)
-		orig_size = 0;
-
-	status = efi_call_virt5(set_variable,
-				name, vendor, attr,
-				data_size, data);
-
-	if (status == EFI_SUCCESS) {
-		if (orig_size) {
-			active_size -= orig_size;
-			active_size -= ucs2_strsize(name, 1024);
-			active_size -= VAR_METADATA_SIZE;
-		}
-		if (data_size) {
-			active_size += data_size;
-			active_size += ucs2_strsize(name, 1024);
-			active_size += VAR_METADATA_SIZE;
-		}
-	}
-
-	return status;
+	return efi_call_virt5(set_variable,
+			      name, vendor, attr,
+			      data_size, data);
 }
 
 static efi_status_t virt_efi_query_variable_info(u32 attr,
@@ -786,9 +708,6 @@ void __init efi_init(void)
 	char vendor[100] = "unknown";
 	int i = 0;
 	void *tmp;
-	struct setup_data *data;
-	struct efi_var_bootdata *efi_var_data;
-	u64 pa_data;
 
 #ifdef CONFIG_X86_32
 	if (boot_params.efi_info.efi_systab_hi ||
@@ -806,22 +725,6 @@ void __init efi_init(void)
 	if (efi_systab_init(efi_phys.systab))
 		return;
 
-	pa_data = boot_params.hdr.setup_data;
-	while (pa_data) {
-		data = early_ioremap(pa_data, sizeof(*efi_var_data));
-		if (data->type == SETUP_EFI_VARS) {
-			efi_var_data = (struct efi_var_bootdata *)data;
-
-			efi_var_store_size = efi_var_data->store_size;
-			efi_var_remaining_size = efi_var_data->remaining_size;
-			efi_var_max_var_size = efi_var_data->max_var_size;
-		}
-		pa_data = data->next;
-		early_iounmap(data, sizeof(*efi_var_data));
-	}
-
-	boot_used_size = efi_var_store_size - efi_var_remaining_size;
-
 	set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
 
 	/*
@@ -1085,6 +988,13 @@ void __init efi_enter_virtual_mode(void)
 		runtime_code_page_mkexec();
 
 	kfree(new_memmap);
+
+	/* clean DUMMY object */
+	efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+			 EFI_VARIABLE_NON_VOLATILE |
+			 EFI_VARIABLE_BOOTSERVICE_ACCESS |
+			 EFI_VARIABLE_RUNTIME_ACCESS,
+			 0, NULL);
 }
 
 /*
@@ -1136,33 +1046,65 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
 	efi_status_t status;
 	u64 storage_size, remaining_size, max_size;
 
+	if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
+		return 0;
+
 	status = efi.query_variable_info(attributes, &storage_size,
 					 &remaining_size, &max_size);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	if (!max_size && remaining_size > size)
-		printk_once(KERN_ERR FW_BUG "Broken EFI implementation"
-			    " is returning MaxVariableSize=0\n");
 	/*
 	 * Some firmware implementations refuse to boot if there's insufficient
 	 * space in the variable store. We account for that by refusing the
 	 * write if permitting it would reduce the available space to under
-	 * 50%. However, some firmware won't reclaim variable space until
-	 * after the used (not merely the actively used) space drops below
-	 * a threshold. We can approximate that case with the value calculated
-	 * above. If both the firmware and our calculations indicate that the
-	 * available space would drop below 50%, refuse the write.
+	 * 5KB. This figure was provided by Samsung, so should be safe.
 	 */
+	if ((remaining_size - size < EFI_MIN_RESERVE) &&
+		!efi_no_storage_paranoia) {
+
+		/*
+		 * Triggering garbage collection may require that the firmware
+		 * generate a real EFI_OUT_OF_RESOURCES error. We can force
+		 * that by attempting to use more space than is available.
+		 */
+		unsigned long dummy_size = remaining_size + 1024;
+		void *dummy = kmalloc(dummy_size, GFP_ATOMIC);
+
+		status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+					  EFI_VARIABLE_NON_VOLATILE |
+					  EFI_VARIABLE_BOOTSERVICE_ACCESS |
+					  EFI_VARIABLE_RUNTIME_ACCESS,
+					  dummy_size, dummy);
+
+		if (status == EFI_SUCCESS) {
+			/*
+			 * This should have failed, so if it didn't make sure
+			 * that we delete it...
+			 */
+			efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+					 EFI_VARIABLE_NON_VOLATILE |
+					 EFI_VARIABLE_BOOTSERVICE_ACCESS |
+					 EFI_VARIABLE_RUNTIME_ACCESS,
+					 0, dummy);
+		}
 
-	if (!storage_size || size > remaining_size ||
-	    (max_size && size > max_size))
-		return EFI_OUT_OF_RESOURCES;
+		/*
+		 * The runtime code may now have triggered a garbage collection
+		 * run, so check the variable info again
+		 */
+		status = efi.query_variable_info(attributes, &storage_size,
+						 &remaining_size, &max_size);
 
-	if (!efi_no_storage_paranoia &&
-	    ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) &&
-	     (remaining_size - size < storage_size / 2)))
-		return EFI_OUT_OF_RESOURCES;
+		if (status != EFI_SUCCESS)
+			return status;
+
+		/*
+		 * There still isn't enough room, so return an error
+		 */
+		if (remaining_size - size < EFI_MIN_RESERVE)
+			return EFI_OUT_OF_RESOURCES;
+	}
 
 	return EFI_SUCCESS;
 }
-- 
cgit v1.2.3


From b1983b0a7578de09211a696802edab83fd253303 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 11 Jun 2013 11:56:52 -0700
Subject: x86, relocs: Move __vvar_page from S_ABS to S_REL

The __vvar_page relocation should actually be listed in S_REL instead
of S_ABS. Oddly, this didn't always cause things to break, presumably
because there are no users for relocation information on 64 bits yet.

[ hpa: Not for stable - new code in 3.10 ]

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/20130611185652.GA23674@www.outflux.net
Reported-by: Michael Davidson <md@google.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/tools/relocs.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 590be1090892..f7bab68a4b83 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -42,9 +42,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
 	"^(xen_irq_disable_direct_reloc$|"
 	"xen_save_fl_direct_reloc$|"
 	"VDSO|"
-#if ELF_BITS == 64
-	"__vvar_page|"
-#endif
 	"__crc_)",
 
 /*
@@ -72,6 +69,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
 	"__per_cpu_load|"
 	"init_per_cpu__.*|"
 	"__end_rodata_hpage_align|"
+	"__vvar_page|"
 #endif
 	"_end)$"
 };
-- 
cgit v1.2.3


From c8a22d19dd238ede87aa0ac4f7dbea8da039b9c1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 5 Jun 2013 11:47:18 -0700
Subject: x86: Fix typo in kexec register clearing

Fixes a typo in register clearing code. Thanks to PaX Team for fixing
this originally, and James Troup for pointing it out.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/20130605184718.GA8396@www.outflux.net
Cc: <stable@vger.kernel.org> v2.6.30+
Cc: PaX Team <pageexec@freemail.hu>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/relocate_kernel_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 7a6f3b3be3cf..f2bb9c96720a 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -160,7 +160,7 @@ identity_mapped:
 	xorq    %rbp, %rbp
 	xorq	%r8,  %r8
 	xorq	%r9,  %r9
-	xorq	%r10, %r9
+	xorq	%r10, %r10
 	xorq	%r11, %r11
 	xorq	%r12, %r12
 	xorq	%r13, %r13
-- 
cgit v1.2.3