summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm/hyp.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:42:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:42:49 -0700
commit519f526d391b0ef775aeb04c4b6f632ea6b3ee50 (patch)
tree36985d7882734c136fc3c9a48e9d9abf9e97c1f1 /arch/arm64/kvm/hyp.S
parent06ab838c2024db468855118087db16d8fa905ddc (diff)
parentba60c41ae392b473a1897faa0b8739fcb8759d69 (diff)
downloadlinux-519f526d391b0ef775aeb04c4b6f632ea6b3ee50.tar.gz
linux-519f526d391b0ef775aeb04c4b6f632ea6b3ee50.tar.bz2
linux-519f526d391b0ef775aeb04c4b6f632ea6b3ee50.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm updates from Paolo Bonzini: "ARM: - Full debug support for arm64 - Active state switching for timer interrupts - Lazy FP/SIMD save/restore for arm64 - Generic ARMv8 target PPC: - Book3S: A few bug fixes - Book3S: Allow micro-threading on POWER8 x86: - Compiler warnings Generic: - Adaptive polling for guest halt" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits) kvm: irqchip: fix memory leak kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF KVM: trace kvm_halt_poll_ns grow/shrink KVM: dynamic halt-polling KVM: make halt_poll_ns per-vCPU Silence compiler warning in arch/x86/kvm/emulate.c kvm: compile process_smi_save_seg_64() only for x86_64 KVM: x86: avoid uninitialized variable warning KVM: PPC: Book3S: Fix typo in top comment about locking KVM: PPC: Book3S: Fix size of the PSPB register KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set KVM: PPC: Book3S HV: Fix race in starting secondary threads KVM: PPC: Book3S: correct width in XER handling KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation KVM: PPC: Book3S HV: Fix preempted vcore list locking KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD KVM: PPC: Book3S HV: Fix bug in dirty page tracking KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8 KVM: PPC: Book3S HV: Make use of unused threads when running guests ...
Diffstat (limited to 'arch/arm64/kvm/hyp.S')
-rw-r--r--arch/arm64/kvm/hyp.S617
1 files changed, 218 insertions, 399 deletions
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 10915aaf0b01..37c89ea2c572 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -230,199 +230,52 @@
stp x24, x25, [x3, #160]
.endm
-.macro save_debug
- // x2: base address for cpu context
- // x3: tmp register
-
- mrs x26, id_aa64dfr0_el1
- ubfx x24, x26, #12, #4 // Extract BRPs
- ubfx x25, x26, #20, #4 // Extract WRPs
- mov w26, #15
- sub w24, w26, w24 // How many BPs to skip
- sub w25, w26, w25 // How many WPs to skip
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-1:
- mrs x20, dbgbcr15_el1
- mrs x19, dbgbcr14_el1
- mrs x18, dbgbcr13_el1
- mrs x17, dbgbcr12_el1
- mrs x16, dbgbcr11_el1
- mrs x15, dbgbcr10_el1
- mrs x14, dbgbcr9_el1
- mrs x13, dbgbcr8_el1
- mrs x12, dbgbcr7_el1
- mrs x11, dbgbcr6_el1
- mrs x10, dbgbcr5_el1
- mrs x9, dbgbcr4_el1
- mrs x8, dbgbcr3_el1
- mrs x7, dbgbcr2_el1
- mrs x6, dbgbcr1_el1
- mrs x5, dbgbcr0_el1
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-
-1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
+.macro save_debug type
+ // x4: pointer to register set
+ // x5: number of registers to skip
+ // x6..x22 trashed
+
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- mrs x20, dbgbvr15_el1
- mrs x19, dbgbvr14_el1
- mrs x18, dbgbvr13_el1
- mrs x17, dbgbvr12_el1
- mrs x16, dbgbvr11_el1
- mrs x15, dbgbvr10_el1
- mrs x14, dbgbvr9_el1
- mrs x13, dbgbvr8_el1
- mrs x12, dbgbvr7_el1
- mrs x11, dbgbvr6_el1
- mrs x10, dbgbvr5_el1
- mrs x9, dbgbvr4_el1
- mrs x8, dbgbvr3_el1
- mrs x7, dbgbvr2_el1
- mrs x6, dbgbvr1_el1
- mrs x5, dbgbvr0_el1
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-
-1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- mrs x20, dbgwcr15_el1
- mrs x19, dbgwcr14_el1
- mrs x18, dbgwcr13_el1
- mrs x17, dbgwcr12_el1
- mrs x16, dbgwcr11_el1
- mrs x15, dbgwcr10_el1
- mrs x14, dbgwcr9_el1
- mrs x13, dbgwcr8_el1
- mrs x12, dbgwcr7_el1
- mrs x11, dbgwcr6_el1
- mrs x10, dbgwcr5_el1
- mrs x9, dbgwcr4_el1
- mrs x8, dbgwcr3_el1
- mrs x7, dbgwcr2_el1
- mrs x6, dbgwcr1_el1
- mrs x5, dbgwcr0_el1
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-
-1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- mrs x20, dbgwvr15_el1
- mrs x19, dbgwvr14_el1
- mrs x18, dbgwvr13_el1
- mrs x17, dbgwvr12_el1
- mrs x16, dbgwvr11_el1
- mrs x15, dbgwvr10_el1
- mrs x14, dbgwvr9_el1
- mrs x13, dbgwvr8_el1
- mrs x12, dbgwvr7_el1
- mrs x11, dbgwvr6_el1
- mrs x10, dbgwvr5_el1
- mrs x9, dbgwvr4_el1
- mrs x8, dbgwvr3_el1
- mrs x7, dbgwvr2_el1
- mrs x6, dbgwvr1_el1
- mrs x5, dbgwvr0_el1
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-
+ mrs x21, \type\()15_el1
+ mrs x20, \type\()14_el1
+ mrs x19, \type\()13_el1
+ mrs x18, \type\()12_el1
+ mrs x17, \type\()11_el1
+ mrs x16, \type\()10_el1
+ mrs x15, \type\()9_el1
+ mrs x14, \type\()8_el1
+ mrs x13, \type\()7_el1
+ mrs x12, \type\()6_el1
+ mrs x11, \type\()5_el1
+ mrs x10, \type\()4_el1
+ mrs x9, \type\()3_el1
+ mrs x8, \type\()2_el1
+ mrs x7, \type\()1_el1
+ mrs x6, \type\()0_el1
+
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- mrs x21, mdccint_el1
- str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+ str x21, [x4, #(15 * 8)]
+ str x20, [x4, #(14 * 8)]
+ str x19, [x4, #(13 * 8)]
+ str x18, [x4, #(12 * 8)]
+ str x17, [x4, #(11 * 8)]
+ str x16, [x4, #(10 * 8)]
+ str x15, [x4, #(9 * 8)]
+ str x14, [x4, #(8 * 8)]
+ str x13, [x4, #(7 * 8)]
+ str x12, [x4, #(6 * 8)]
+ str x11, [x4, #(5 * 8)]
+ str x10, [x4, #(4 * 8)]
+ str x9, [x4, #(3 * 8)]
+ str x8, [x4, #(2 * 8)]
+ str x7, [x4, #(1 * 8)]
+ str x6, [x4, #(0 * 8)]
.endm
.macro restore_sysregs
@@ -467,195 +320,52 @@
msr mdscr_el1, x25
.endm
-.macro restore_debug
- // x2: base address for cpu context
- // x3: tmp register
-
- mrs x26, id_aa64dfr0_el1
- ubfx x24, x26, #12, #4 // Extract BRPs
- ubfx x25, x26, #20, #4 // Extract WRPs
- mov w26, #15
- sub w24, w26, w24 // How many BPs to skip
- sub w25, w26, w25 // How many WPs to skip
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+.macro restore_debug type
+ // x4: pointer to register set
+ // x5: number of registers to skip
+ // x6..x22 trashed
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- msr dbgbcr15_el1, x20
- msr dbgbcr14_el1, x19
- msr dbgbcr13_el1, x18
- msr dbgbcr12_el1, x17
- msr dbgbcr11_el1, x16
- msr dbgbcr10_el1, x15
- msr dbgbcr9_el1, x14
- msr dbgbcr8_el1, x13
- msr dbgbcr7_el1, x12
- msr dbgbcr6_el1, x11
- msr dbgbcr5_el1, x10
- msr dbgbcr4_el1, x9
- msr dbgbcr3_el1, x8
- msr dbgbcr2_el1, x7
- msr dbgbcr1_el1, x6
- msr dbgbcr0_el1, x5
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
+ ldr x21, [x4, #(15 * 8)]
+ ldr x20, [x4, #(14 * 8)]
+ ldr x19, [x4, #(13 * 8)]
+ ldr x18, [x4, #(12 * 8)]
+ ldr x17, [x4, #(11 * 8)]
+ ldr x16, [x4, #(10 * 8)]
+ ldr x15, [x4, #(9 * 8)]
+ ldr x14, [x4, #(8 * 8)]
+ ldr x13, [x4, #(7 * 8)]
+ ldr x12, [x4, #(6 * 8)]
+ ldr x11, [x4, #(5 * 8)]
+ ldr x10, [x4, #(4 * 8)]
+ ldr x9, [x4, #(3 * 8)]
+ ldr x8, [x4, #(2 * 8)]
+ ldr x7, [x4, #(1 * 8)]
+ ldr x6, [x4, #(0 * 8)]
+
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-1:
- msr dbgbvr15_el1, x20
- msr dbgbvr14_el1, x19
- msr dbgbvr13_el1, x18
- msr dbgbvr12_el1, x17
- msr dbgbvr11_el1, x16
- msr dbgbvr10_el1, x15
- msr dbgbvr9_el1, x14
- msr dbgbvr8_el1, x13
- msr dbgbvr7_el1, x12
- msr dbgbvr6_el1, x11
- msr dbgbvr5_el1, x10
- msr dbgbvr4_el1, x9
- msr dbgbvr3_el1, x8
- msr dbgbvr2_el1, x7
- msr dbgbvr1_el1, x6
- msr dbgbvr0_el1, x5
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- msr dbgwcr15_el1, x20
- msr dbgwcr14_el1, x19
- msr dbgwcr13_el1, x18
- msr dbgwcr12_el1, x17
- msr dbgwcr11_el1, x16
- msr dbgwcr10_el1, x15
- msr dbgwcr9_el1, x14
- msr dbgwcr8_el1, x13
- msr dbgwcr7_el1, x12
- msr dbgwcr6_el1, x11
- msr dbgwcr5_el1, x10
- msr dbgwcr4_el1, x9
- msr dbgwcr3_el1, x8
- msr dbgwcr2_el1, x7
- msr dbgwcr1_el1, x6
- msr dbgwcr0_el1, x5
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- msr dbgwvr15_el1, x20
- msr dbgwvr14_el1, x19
- msr dbgwvr13_el1, x18
- msr dbgwvr12_el1, x17
- msr dbgwvr11_el1, x16
- msr dbgwvr10_el1, x15
- msr dbgwvr9_el1, x14
- msr dbgwvr8_el1, x13
- msr dbgwvr7_el1, x12
- msr dbgwvr6_el1, x11
- msr dbgwvr5_el1, x10
- msr dbgwvr4_el1, x9
- msr dbgwvr3_el1, x8
- msr dbgwvr2_el1, x7
- msr dbgwvr1_el1, x6
- msr dbgwvr0_el1, x5
-
- ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
- msr mdccint_el1, x21
+ msr \type\()15_el1, x21
+ msr \type\()14_el1, x20
+ msr \type\()13_el1, x19
+ msr \type\()12_el1, x18
+ msr \type\()11_el1, x17
+ msr \type\()10_el1, x16
+ msr \type\()9_el1, x15
+ msr \type\()8_el1, x14
+ msr \type\()7_el1, x13
+ msr \type\()6_el1, x12
+ msr \type\()5_el1, x11
+ msr \type\()4_el1, x10
+ msr \type\()3_el1, x9
+ msr \type\()2_el1, x8
+ msr \type\()1_el1, x7
+ msr \type\()0_el1, x6
.endm
.macro skip_32bit_state tmp, target
@@ -675,6 +385,14 @@
tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
.endm
+/*
+ * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
+ */
+.macro skip_fpsimd_state tmp, target
+ mrs \tmp, cptr_el2
+ tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
+.endm
+
.macro compute_debug_state target
// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
// is set, we do a full save/restore cycle and disable trapping.
@@ -713,10 +431,12 @@
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
mrs x4, dacr32_el2
mrs x5, ifsr32_el2
- mrs x6, fpexc32_el2
stp x4, x5, [x3]
- str x6, [x3, #16]
+ skip_fpsimd_state x8, 3f
+ mrs x6, fpexc32_el2
+ str x6, [x3, #16]
+3:
skip_debug_state x8, 2f
mrs x7, dbgvcr32_el2
str x7, [x3, #24]
@@ -743,10 +463,8 @@
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
ldp x4, x5, [x3]
- ldr x6, [x3, #16]
msr dacr32_el2, x4
msr ifsr32_el2, x5
- msr fpexc32_el2, x6
skip_debug_state x8, 2f
ldr x7, [x3, #24]
@@ -763,31 +481,35 @@
.macro activate_traps
ldr x2, [x0, #VCPU_HCR_EL2]
+
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ */
+ tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
+ mov x3, #(1 << 30)
+ msr fpexc32_el2, x3
+ isb
+99:
msr hcr_el2, x2
mov x2, #CPTR_EL2_TTA
+ orr x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
mov x2, #(1 << 15) // Trap CP15 Cr=15
msr hstr_el2, x2
- mrs x2, mdcr_el2
- and x2, x2, #MDCR_EL2_HPMN_MASK
- orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
- orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
-
- // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
- // if not dirty.
- ldr x3, [x0, #VCPU_DEBUG_FLAGS]
- tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
- orr x2, x2, #MDCR_EL2_TDA
-1:
+ // Monitor Debug Config - see kvm_arm_setup_debug()
+ ldr x2, [x0, #VCPU_MDCR_EL2]
msr mdcr_el2, x2
.endm
.macro deactivate_traps
mov x2, #HCR_RW
msr hcr_el2, x2
- msr cptr_el2, xzr
msr hstr_el2, xzr
mrs x2, mdcr_el2
@@ -900,21 +622,101 @@ __restore_sysregs:
restore_sysregs
ret
+/* Save debug state */
__save_debug:
- save_debug
+ // x2: ptr to CPU context
+ // x3: ptr to debug reg struct
+ // x4/x5/x6-22/x24-26: trashed
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ mov x5, x24
+ add x4, x3, #DEBUG_BCR
+ save_debug dbgbcr
+ add x4, x3, #DEBUG_BVR
+ save_debug dbgbvr
+
+ mov x5, x25
+ add x4, x3, #DEBUG_WCR
+ save_debug dbgwcr
+ add x4, x3, #DEBUG_WVR
+ save_debug dbgwvr
+
+ mrs x21, mdccint_el1
+ str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
ret
+/* Restore debug state */
__restore_debug:
- restore_debug
+ // x2: ptr to CPU context
+ // x3: ptr to debug reg struct
+ // x4/x5/x6-22/x24-26: trashed
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ mov x5, x24
+ add x4, x3, #DEBUG_BCR
+ restore_debug dbgbcr
+ add x4, x3, #DEBUG_BVR
+ restore_debug dbgbvr
+
+ mov x5, x25
+ add x4, x3, #DEBUG_WCR
+ restore_debug dbgwcr
+ add x4, x3, #DEBUG_WVR
+ restore_debug dbgwvr
+
+ ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+ msr mdccint_el1, x21
+
ret
__save_fpsimd:
+ skip_fpsimd_state x3, 1f
save_fpsimd
- ret
+1: ret
__restore_fpsimd:
+ skip_fpsimd_state x3, 1f
restore_fpsimd
- ret
+1: ret
+
+switch_to_guest_fpsimd:
+ push x4, lr
+
+ mrs x2, cptr_el2
+ bic x2, x2, #CPTR_EL2_TFP
+ msr cptr_el2, x2
+ isb
+
+ mrs x0, tpidr_el2
+
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+ bl __save_fpsimd
+
+ add x2, x0, #VCPU_CONTEXT
+ bl __restore_fpsimd
+
+ skip_32bit_state x3, 1f
+ ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
+ msr fpexc32_el2, x4
+1:
+ pop x4, lr
+ pop x2, x3
+ pop x0, x1
+
+ eret
/*
* u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
@@ -936,10 +738,10 @@ ENTRY(__kvm_vcpu_run)
kern_hyp_va x2
save_host_regs
- bl __save_fpsimd
bl __save_sysregs
compute_debug_state 1f
+ add x3, x0, #VCPU_HOST_DEBUG_STATE
bl __save_debug
1:
activate_traps
@@ -952,9 +754,10 @@ ENTRY(__kvm_vcpu_run)
add x2, x0, #VCPU_CONTEXT
bl __restore_sysregs
- bl __restore_fpsimd
skip_debug_state x3, 1f
+ ldr x3, [x0, #VCPU_DEBUG_PTR]
+ kern_hyp_va x3
bl __restore_debug
1:
restore_guest_32bit_state
@@ -975,6 +778,8 @@ __kvm_vcpu_return:
bl __save_sysregs
skip_debug_state x3, 1f
+ ldr x3, [x0, #VCPU_DEBUG_PTR]
+ kern_hyp_va x3
bl __save_debug
1:
save_guest_32bit_state
@@ -991,12 +796,15 @@ __kvm_vcpu_return:
bl __restore_sysregs
bl __restore_fpsimd
+ /* Clear FPSIMD and Trace trapping */
+ msr cptr_el2, xzr
skip_debug_state x3, 1f
// Clear the dirty flag for the next run, as all the state has
// already been saved. Note that we nuke the whole 64bit word.
// If we ever add more flags, we'll have to be more careful...
str xzr, [x0, #VCPU_DEBUG_FLAGS]
+ add x3, x0, #VCPU_HOST_DEBUG_STATE
bl __restore_debug
1:
restore_host_regs
@@ -1199,6 +1007,11 @@ el1_trap:
* x1: ESR
* x2: ESR_EC
*/
+
+ /* Guest accessed VFP/SIMD registers, save host, restore Guest */
+ cmp x2, #ESR_ELx_EC_FP_ASIMD
+ b.eq switch_to_guest_fpsimd
+
cmp x2, #ESR_ELx_EC_DABT_LOW
mov x0, #ESR_ELx_EC_IABT_LOW
ccmp x2, x0, #4, ne
@@ -1293,4 +1106,10 @@ ENTRY(__kvm_hyp_vector)
ventry el1_error_invalid // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
+
+ENTRY(__kvm_get_mdcr_el2)
+ mrs x0, mdcr_el2
+ ret
+ENDPROC(__kvm_get_mdcr_el2)
+
.popsection