From 1a82f6ab23659aa01a796d9d444ec9cc63ded26c Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:06 +0100 Subject: s390/uaccess: Add copy_from/to_user_key functions Add copy_from/to_user_key functions, which perform storage key checking. These functions can be used by KVM for emulating instructions that need to be key checked. These functions differ from their non _key counterparts in include/linux/uaccess.h only in the additional key argument and must be kept in sync with those. Since the existing uaccess implementation on s390 makes use of move instructions that support having an additional access key supplied, we can implement raw_copy_from/to_user_key by enhancing the existing implementation. Signed-off-by: Janis Schoetterl-Glausch Acked-by: Heiko Carstens Reviewed-by: Christian Borntraeger Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-2-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/uaccess.h | 22 +++++++++++ arch/s390/lib/uaccess.c | 81 ++++++++++++++++++++++++++++++++--------- 2 files changed, 85 insertions(+), 18 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d74e26b48604..ba1bcb91af95 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -44,6 +44,28 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); #define INLINE_COPY_TO_USER #endif +unsigned long __must_check +_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(to, n, false))) + n = _copy_from_user_key(to, from, n, key); + return n; +} + +unsigned long __must_check +_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(from, n, true))) + n = _copy_to_user_key(to, from, n, key); + return n; +} + int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 8a5d21461889..b709239feb5d 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void) #endif static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { + .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, .oac2.a = 1, }; @@ -94,19 +96,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr } static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%3\n" + "0: mvcp 0(%0,%2),0(%1),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%3\n" + "2: mvcp 0(%0,%2),0(%1),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -115,7 +117,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%3\n" + "4: mvcp 0(%4,%2),0(%1),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -123,24 +125,49 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); return size; } -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) { if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n); - return copy_from_user_mvcp(to, from, n); + return copy_from_user_mvcos(to, from, n, key); + return copy_from_user_mvcp(to, from, n, key); +} + +unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return raw_copy_from_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_from_user); +unsigned long _copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) +{ + unsigned long res = n; + + might_fault(); + if (!should_fail_usercopy()) { + instrument_copy_from_user(to, from, n); + res = raw_copy_from_user_key(to, from, n, key); + } + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +EXPORT_SYMBOL(_copy_from_user_key); + static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { + .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, .oac1.a = 1, }; @@ -171,19 +198,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, } static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%3\n" + "0: mvcs 0(%0,%1),0(%2),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%3\n" + "2: mvcs 0(%0,%1),0(%2),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -192,7 +219,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%3\n" + "4: mvcs 0(%4,%1),0(%2),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -200,18 +227,36 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); return size; } -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) { if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n); - return copy_to_user_mvcs(to, from, n); + return copy_to_user_mvcos(to, from, n, key); + return copy_to_user_mvcs(to, from, n, key); +} + +unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return raw_copy_to_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_to_user); +unsigned long _copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) +{ + might_fault(); + if (should_fail_usercopy()) + return n; + instrument_copy_to_user(to, from, n); + return raw_copy_to_user_key(to, from, n, key); +} +EXPORT_SYMBOL(_copy_to_user_key); + static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; -- cgit v1.2.3 From e613d83454d7da1c37d78edb278db9c20afb21a2 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:07 +0100 Subject: KVM: s390: Honor storage keys when accessing guest memory Storage key checking had not been implemented for instructions emulated by KVM. Implement it by enhancing the functions used for guest access, in particular those making use of access_guest which has been renamed to access_guest_with_key. Accesses via access_guest_real should not be key checked. For actual accesses, key checking is done by copy_from/to_user_key (which internally uses MVCOS/MVCP/MVCS). In cases where accessibility is checked without an actual access, this is performed by getting the storage key and checking if the access key matches. In both cases, if applicable, storage and fetch protection override are honored. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-3-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/ctl_reg.h | 2 + arch/s390/include/asm/page.h | 2 + arch/s390/kvm/gaccess.c | 187 +++++++++++++++++++++++++++++++++++++--- arch/s390/kvm/gaccess.h | 77 ++++++++++++++--- arch/s390/kvm/intercept.c | 12 +-- arch/s390/kvm/kvm-s390.c | 4 +- 6 files changed, 253 insertions(+), 31 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 04dc65f8901d..c800199a376b 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -12,6 +12,8 @@ #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35) +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38) +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39) #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index d98d17a36c7b..cfc4d6fb2385 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -20,6 +20,8 @@ #define PAGE_SIZE _PAGE_SIZE #define PAGE_MASK _PAGE_MASK #define PAGE_DEFAULT_ACC 0 +/* storage-protection override */ +#define PAGE_SPO_ACC 9 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) #define HPAGE_SHIFT 20 diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4460808c3b9a..7fca0cff4c12 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "kvm-s390.h" @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, + union asce asce) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long override; + + if (mode == GACC_FETCH || mode == GACC_IFETCH) { + /* check if fetch protection override enabled */ + override = vcpu->arch.sie_block->gcr[0]; + override &= CR0_FETCH_PROTECTION_OVERRIDE; + /* not applicable if subject to DAT && private space */ + override = override && !(psw_bits(*psw).dat && asce.p); + return override; + } + return false; +} + +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) +{ + return ga < 2048 && ga + len <= 2048; +} + +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) +{ + /* check if storage protection override enabled */ + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; +} + +static bool storage_prot_override_applies(u8 access_control) +{ + /* matches special storage protection override key (9) -> allow */ + return access_control == PAGE_SPO_ACC; +} + +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, + enum gacc_mode mode, union asce asce, gpa_t gpa, + unsigned long ga, unsigned int len) +{ + u8 storage_key, access_control; + unsigned long hva; + int r; + + /* access key 0 matches any storage key -> allow */ + if (access_key == 0) + return 0; + /* + * caller needs to ensure that gfn is accessible, so we can + * assume that this cannot fail + */ + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); + mmap_read_lock(current->mm); + r = get_guest_storage_key(current->mm, hva, &storage_key); + mmap_read_unlock(current->mm); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + /* access key matches storage key -> allow */ + if (access_control == access_key) + return 0; + if (mode == GACC_FETCH || mode == GACC_IFETCH) { + /* it is a fetch and fetch protection is off -> allow */ + if (!(storage_key & _PAGE_FP_BIT)) + return 0; + if (fetch_prot_override_applicable(vcpu, mode, asce) && + fetch_prot_override_applies(ga, len)) + return 0; + } + if (storage_prot_override_applicable(vcpu) && + storage_prot_override_applies(access_control)) + return 0; + return PGM_PROTECTION; +} + /** * guest_range_to_gpas() - Calculate guest physical addresses of page fragments * covering a logical range @@ -804,6 +878,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, * @len: length of range in bytes * @asce: address-space-control element to use for translation * @mode: access mode + * @access_key: access key to mach the range's storage keys against * * Translate a logical range to a series of guest absolute addresses, * such that the concatenation of page fragments starting at each gpa make up @@ -830,7 +905,8 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, */ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, unsigned long *gpas, unsigned long len, - const union asce asce, enum gacc_mode mode) + const union asce asce, enum gacc_mode mode, + u8 access_key) { psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned int offset = offset_in_page(ga); @@ -857,6 +933,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, } if (rc) return trans_exc(vcpu, rc, ga, ar, mode, prot); + rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, + fragment_len); + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); if (gpas) *gpas++ = gpa; offset = 0; @@ -880,16 +960,54 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, return rc; } -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, - unsigned long len, enum gacc_mode mode) +static int +access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, + void *data, unsigned int len, u8 access_key) +{ + struct kvm_memory_slot *slot; + bool writable; + gfn_t gfn; + hva_t hva; + int rc; + + gfn = gpa >> PAGE_SHIFT; + slot = gfn_to_memslot(kvm, gfn); + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). + * Don't try to actually handle that case. + */ + if (!writable && mode == GACC_STORE) + return -EOPNOTSUPP; + hva += offset_in_page(gpa); + if (mode == GACC_STORE) + rc = copy_to_user_key((void __user *)hva, data, len, access_key); + else + rc = copy_from_user_key(data, (void __user *)hva, len, access_key); + if (rc) + return PGM_PROTECTION; + if (mode == GACC_STORE) + mark_page_dirty_in_slot(kvm, slot, gfn); + return 0; +} + +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key) { psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long nr_pages, idx; unsigned long gpa_array[2]; unsigned int fragment_len; unsigned long *gpas; + enum prot_type prot; int need_ipte_lock; union asce asce; + bool try_storage_prot_override; + bool try_fetch_prot_override; int rc; if (!len) @@ -904,16 +1022,47 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); if (!gpas) return -ENOMEM; + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); + try_storage_prot_override = storage_prot_override_applicable(vcpu); need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); - for (idx = 0; idx < nr_pages && !rc; idx++) { + /* + * Since we do the access further down ultimately via a move instruction + * that does key checking and returns an error in case of a protection + * violation, we don't need to do the check during address translation. + * Skip it by passing access key 0, which matches any storage key, + * obviating the need for any further checks. As a result the check is + * handled entirely in hardware on access, we only need to take care to + * forego key protection checking if fetch protection override applies or + * retry with the special key 9 in case of storage protection override. + */ + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); + if (rc) + goto out_unlock; + for (idx = 0; idx < nr_pages; idx++) { fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); - rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len); + if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { + rc = access_guest_page(vcpu->kvm, mode, gpas[idx], + data, fragment_len); + } else { + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], + data, fragment_len, access_key); + } + if (rc == PGM_PROTECTION && try_storage_prot_override) + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], + data, fragment_len, PAGE_SPO_ACC); + if (rc == PGM_PROTECTION) + prot = PROT_TYPE_KEYC; + if (rc) + break; len -= fragment_len; data += fragment_len; + ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); } + if (rc > 0) + rc = trans_exc(vcpu, rc, ga, ar, mode, prot); +out_unlock: if (need_ipte_lock) ipte_unlock(vcpu); if (nr_pages > ARRAY_SIZE(gpa_array)) @@ -940,12 +1089,13 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, } /** - * guest_translate_address - translate guest logical into guest absolute address + * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu * @gva: Guest virtual address * @ar: Access register * @gpa: Guest physical address * @mode: Translation access mode + * @access_key: access key to mach the storage key with * * Parameter semantics are the same as the ones from guest_translate. * The memory contents at the guest address are not changed. @@ -953,8 +1103,9 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * Note: The IPTE lock is not taken during this function, so the caller * has to take care of this. */ -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long *gpa, enum gacc_mode mode) +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode, + u8 access_key) { union asce asce; int rc; @@ -963,7 +1114,17 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; - return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode); + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, + access_key); +} + +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode) +{ + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode, + access_key); } /** @@ -973,9 +1134,10 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, * @ar: Access register * @length: Length of test range * @mode: Translation access mode + * @access_key: access key to mach the storage keys with */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long length, enum gacc_mode mode) + unsigned long length, enum gacc_mode mode, u8 access_key) { union asce asce; int rc = 0; @@ -984,7 +1146,8 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, if (rc) return rc; ipte_lock(vcpu); - rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode); + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, + access_key); ipte_unlock(vcpu); return rc; diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 7c72a5e3449f..e5b2f56e7962 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -186,24 +186,31 @@ enum gacc_mode { GACC_IFETCH, }; +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode, + u8 access_key); + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long *gpa, enum gacc_mode mode); + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long length, enum gacc_mode mode); + unsigned long length, enum gacc_mode mode, u8 access_key); -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, - unsigned long len, enum gacc_mode mode); +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode); /** - * write_guest - copy data from kernel space to guest space + * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu * @ga: guest address * @ar: access register * @data: source address in kernel space * @len: number of bytes to copy + * @access_key: access key the storage key needs to match * * Copy @len bytes from @data (kernel space) to @ga (guest address). * In order to copy data to guest space the PSW of the vcpu is inspected: @@ -214,8 +221,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * The addressing mode of the PSW is also inspected, so that address wrap * around is taken into account for 24-, 31- and 64-bit addressing mode, * if the to be copied data crosses page boundaries in guest address space. - * In addition also low address and DAT protection are inspected before - * copying any data (key protection is currently not implemented). + * In addition low address, DAT and key protection checks are performed before + * copying any data. * * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu. * In case of an access exception (e.g. protection exception) pgm will contain @@ -243,10 +250,53 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * if data has been changed in guest space in case of an exception. */ static inline __must_check +int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, u8 access_key) +{ + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE, + access_key); +} + +/** + * write_guest - copy data from kernel space to guest space + * @vcpu: virtual cpu + * @ga: guest address + * @ar: access register + * @data: source address in kernel space + * @len: number of bytes to copy + * + * The behaviour of write_guest is identical to write_guest_with_key, except + * that the PSW access key is used instead of an explicit argument. + */ +static inline __must_check int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, GACC_STORE); + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return write_guest_with_key(vcpu, ga, ar, data, len, access_key); +} + +/** + * read_guest_with_key - copy data from guest space to kernel space + * @vcpu: virtual cpu + * @ga: guest address + * @ar: access register + * @data: destination address in kernel space + * @len: number of bytes to copy + * @access_key: access key the storage key needs to match + * + * Copy @len bytes from @ga (guest address) to @data (kernel space). + * + * The behaviour of read_guest_with_key is identical to write_guest_with_key, + * except that data will be copied from guest space to kernel space. + */ +static inline __must_check +int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, u8 access_key) +{ + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH, + access_key); } /** @@ -259,14 +309,16 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, * * Copy @len bytes from @ga (guest address) to @data (kernel space). * - * The behaviour of read_guest is identical to write_guest, except that - * data will be copied from guest space to kernel space. + * The behaviour of read_guest is identical to read_guest_with_key, except + * that the PSW access key is used instead of an explicit argument. */ static inline __must_check int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return read_guest_with_key(vcpu, ga, ar, data, len, access_key); } /** @@ -287,7 +339,10 @@ static inline __must_check int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data, unsigned long len) { - return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH); + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH, + access_key); } /** diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index d07ff646d844..8bd42a20d924 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -331,18 +331,18 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - /* Make sure that the source is paged-in */ - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - reg2, &srcaddr, GACC_FETCH); + /* Ensure that the source is paged-in, no actual access -> no key checking */ + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2], + reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); if (rc != 0) return rc; - /* Make sure that the destination is paged-in */ - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - reg1, &dstaddr, GACC_STORE); + /* Ensure that the source is paged-in, no actual access -> no key checking */ + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1], + reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2296b1ff1e02..fdbd6c1dc709 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4713,7 +4713,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { r = check_gva_range(vcpu, mop->gaddr, mop->ar, - mop->size, GACC_FETCH); + mop->size, GACC_FETCH, 0); break; } r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); @@ -4725,7 +4725,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { r = check_gva_range(vcpu, mop->gaddr, mop->ar, - mop->size, GACC_STORE); + mop->size, GACC_STORE, 0); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { -- cgit v1.2.3 From 61380a7adfce1524b8cd16c0ce4f46abce587f95 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:08 +0100 Subject: KVM: s390: handle_tprot: Honor storage keys Use the access key operand to check for key protection when translating guest addresses. Since the translation code checks for accessing exceptions/error hvas, we can remove the check here and simplify the control flow. Keep checking if the memory is read-only even if such memslots are currently not supported. handle_tprot was the last user of guest_translate_address, so remove it. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220211182215.2730017-4-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 9 ------- arch/s390/kvm/gaccess.h | 3 --- arch/s390/kvm/priv.c | 66 ++++++++++++++++++++++++++----------------------- 3 files changed, 35 insertions(+), 43 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 7fca0cff4c12..37838f637707 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1118,15 +1118,6 @@ int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u access_key); } -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long *gpa, enum gacc_mode mode) -{ - u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; - - return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode, - access_key); -} - /** * check_gva_range - test a range of guest virtual addresses for accessibility * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index e5b2f56e7962..c5f2e7311b17 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -190,9 +190,6 @@ int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u unsigned long *gpa, enum gacc_mode mode, u8 access_key); -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - u8 ar, unsigned long *gpa, enum gacc_mode mode); - int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode, u8 access_key); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 417154b314a6..30b24c42ef99 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1443,10 +1443,11 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) static int handle_tprot(struct kvm_vcpu *vcpu) { - u64 address1, address2; - unsigned long hva, gpa; - int ret = 0, cc = 0; + u64 address, operand2; + unsigned long gpa; + u8 access_key; bool writable; + int ret, cc; u8 ar; vcpu->stat.instruction_tprot++; @@ -1454,43 +1455,46 @@ static int handle_tprot(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL); + kvm_s390_get_base_disp_sse(vcpu, &address, &operand2, &ar, NULL); + access_key = (operand2 & 0xf0) >> 4; - /* we only handle the Linux memory detection case: - * access key == 0 - * everything else goes to userspace. */ - if (address2 & 0xf0) - return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); - if (ret == PGM_PROTECTION) { + + ret = guest_translate_address_with_key(vcpu, address, ar, &gpa, + GACC_STORE, access_key); + if (ret == 0) { + gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); + } else if (ret == PGM_PROTECTION) { + writable = false; /* Write protected? Try again with read-only... */ - cc = 1; - ret = guest_translate_address(vcpu, address1, ar, &gpa, - GACC_FETCH); + ret = guest_translate_address_with_key(vcpu, address, ar, &gpa, + GACC_FETCH, access_key); } - if (ret) { - if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { - ret = kvm_s390_inject_program_int(vcpu, ret); - } else if (ret > 0) { - /* Translation not available */ - kvm_s390_set_psw_cc(vcpu, 3); + if (ret >= 0) { + cc = -1; + + /* Fetching permitted; storing permitted */ + if (ret == 0 && writable) + cc = 0; + /* Fetching permitted; storing not permitted */ + else if (ret == 0 && !writable) + cc = 1; + /* Fetching not permitted; storing not permitted */ + else if (ret == PGM_PROTECTION) + cc = 2; + /* Translation not available */ + else if (ret != PGM_ADDRESSING && ret != PGM_TRANSLATION_SPEC) + cc = 3; + + if (cc != -1) { + kvm_s390_set_psw_cc(vcpu, cc); ret = 0; + } else { + ret = kvm_s390_inject_program_int(vcpu, ret); } - goto out_unlock; } - hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); - if (kvm_is_error_hva(hva)) { - ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } else { - if (!writable) - cc = 1; /* Write not permitted ==> read-only */ - kvm_s390_set_psw_cc(vcpu, cc); - /* Note: CC2 only occurs for storage keys (not supported yet) */ - } -out_unlock: if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_unlock(vcpu); return ret; -- cgit v1.2.3 From e9e9feebcbc14b174fef862842f8cc9a388e1db3 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:10 +0100 Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL User space needs a mechanism to perform key checked accesses when emulating instructions. The key can be passed as an additional argument. Having an additional argument is flexible, as user space can pass the guest PSW's key, in order to make an access the same way the CPU would, or pass another key if necessary. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fdbd6c1dc709..c31b40abfa23 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2359,6 +2359,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) return r; } +static bool access_key_invalid(u8 access_key) +{ + return access_key > 0xf; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4692,17 +4697,21 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, void *tmpbuf = NULL; int r = 0; const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY + | KVM_S390_MEMOP_F_SKEY_PROTECTION; if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) return -EINVAL; - if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; - if (kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; - + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (access_key_invalid(mop->key)) + return -EINVAL; + } else { + mop->key = 0; + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -4712,11 +4721,12 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, - mop->size, GACC_FETCH, 0); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, + GACC_FETCH, mop->key); break; } - r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, + mop->size, mop->key); if (r == 0) { if (copy_to_user(uaddr, tmpbuf, mop->size)) r = -EFAULT; @@ -4724,15 +4734,16 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, - mop->size, GACC_STORE, 0); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, + GACC_STORE, mop->key); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; break; } - r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, + mop->size, mop->key); break; } -- cgit v1.2.3 From ef11c9463ae006302ce170a401854a48ea0532ca Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:11 +0100 Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access Channel I/O honors storage keys and is performed on absolute memory. For I/O emulation user space therefore needs to be able to do key checked accesses. The vm IOCTL supports read/write accesses, as well as checking if an access would succeed. Unlike relying on KVM_S390_GET_SKEYS for key checking would, the vm IOCTL performs the check in lockstep with the read or write, by, ultimately, mapping the access to move instructions that support key protection checking with a supplied key. Fetch and storage protection override are not applicable to absolute accesses and so are not applied as they are when using the vcpu memop. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 72 ++++++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/gaccess.h | 6 ++++ arch/s390/kvm/kvm-s390.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 37838f637707..d53a183c2005 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -795,6 +795,35 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } +static int vm_check_access_key(struct kvm *kvm, u8 access_key, + enum gacc_mode mode, gpa_t gpa) +{ + u8 storage_key, access_control; + bool fetch_protected; + unsigned long hva; + int r; + + if (access_key == 0) + return 0; + + hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + + mmap_read_lock(current->mm); + r = get_guest_storage_key(current->mm, hva, &storage_key); + mmap_read_unlock(current->mm); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + if (access_control == access_key) + return 0; + fetch_protected = storage_key & _PAGE_FP_BIT; + if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) + return 0; + return PGM_PROTECTION; +} + static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, union asce asce) { @@ -994,6 +1023,26 @@ access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, return 0; } +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key) +{ + int offset = offset_in_page(gpa); + int fragment_len; + int rc; + + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); + rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); + if (rc) + return rc; + offset = 0; + len -= fragment_len; + data += fragment_len; + gpa += fragment_len; + } + return 0; +} + int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode, u8 access_key) @@ -1144,6 +1193,29 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, return rc; } +/** + * check_gpa_range - test a range of guest physical addresses for accessibility + * @kvm: virtual machine instance + * @gpa: guest physical address + * @length: length of test range + * @mode: access mode to test, relevant for storage keys + * @access_key: access key to mach the storage keys with + */ +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key) +{ + unsigned int fragment_len; + int rc = 0; + + while (length && !rc) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); + rc = vm_check_access_key(kvm, access_key, mode, gpa); + length -= fragment_len; + gpa += fragment_len; + } + return rc; +} + /** * kvm_s390_check_low_addr_prot_real - check for low-address protection * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index c5f2e7311b17..1124ff282012 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -193,6 +193,12 @@ int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode, u8 access_key); +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key); + +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key); + int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode, u8 access_key); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c31b40abfa23..36bc73b5f5de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2364,6 +2364,78 @@ static bool access_key_invalid(u8 access_key) return access_key > 0xf; } +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + u64 supported_flags; + void *tmpbuf = NULL; + int r, srcu_idx; + + supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + if (mop->flags & ~supported_flags) + return -EINVAL; + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; + if (kvm_s390_pv_is_protected(kvm)) + return -EINVAL; + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (access_key_invalid(mop->key)) + return -EINVAL; + } else { + mop->key = 0; + } + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { + tmpbuf = vmalloc(mop->size); + if (!tmpbuf) + return -ENOMEM; + } + + srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_is_error_gpa(kvm, mop->gaddr)) { + r = PGM_ADDRESSING; + goto out_unlock; + } + + switch (mop->op) { + case KVM_S390_MEMOP_ABSOLUTE_READ: { + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); + } else { + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_FETCH, mop->key); + if (r == 0) { + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } + } + break; + } + case KVM_S390_MEMOP_ABSOLUTE_WRITE: { + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else { + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + break; + } + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_STORE, mop->key); + } + break; + } + default: + r = -EINVAL; + } + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + vfree(tmpbuf); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2488,6 +2560,15 @@ long kvm_arch_vm_ioctl(struct file *filp, } break; } + case KVM_S390_MEM_OP: { + struct kvm_s390_mem_op mem_op; + + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) + r = kvm_s390_vm_mem_op(kvm, &mem_op); + else + r = -EFAULT; + break; + } default: r = -ENOTTY; } -- cgit v1.2.3 From 0e1234c02b77ef22d9cf78f86b98347ceb170090 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:12 +0100 Subject: KVM: s390: Rename existing vcpu memop functions Makes the naming consistent, now that we also have a vm ioctl. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220211182215.2730017-8-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 36bc73b5f5de..773bccdd446c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4741,8 +4741,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } -static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, - struct kvm_s390_mem_op *mop) +static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; int r = 0; @@ -4771,8 +4771,9 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, } return r; } -static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, - struct kvm_s390_mem_op *mop) + +static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; void *tmpbuf = NULL; @@ -4835,8 +4836,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, return r; } -static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, - struct kvm_s390_mem_op *mop) +static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) { int r, srcu_idx; @@ -4845,12 +4846,12 @@ static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: case KVM_S390_MEMOP_LOGICAL_WRITE: - r = kvm_s390_guest_mem_op(vcpu, mop); + r = kvm_s390_vcpu_mem_op(vcpu, mop); break; case KVM_S390_MEMOP_SIDA_READ: case KVM_S390_MEMOP_SIDA_WRITE: /* we are locked against sida going away by the vcpu->mutex */ - r = kvm_s390_guest_sida_op(vcpu, mop); + r = kvm_s390_vcpu_sida_op(vcpu, mop); break; default: r = -EINVAL; @@ -5013,7 +5014,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_s390_mem_op mem_op; if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) - r = kvm_s390_guest_memsida_op(vcpu, &mem_op); + r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); else r = -EFAULT; break; -- cgit v1.2.3 From d004079edc166ff19605475211305923c708b4d5 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:13 +0100 Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that: * The vcpu MEM_OP IOCTL supports storage key checking. * The vm MEM_OP IOCTL exists. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 773bccdd446c..c2c26c2aad64 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -564,6 +564,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: + case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: -- cgit v1.2.3 From 3d9042f8b923810c169ece02d91c70ec498eff0b Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 21 Feb 2022 17:32:37 +0100 Subject: KVM: s390: Add missing vm MEM_OP size check Check that size is not zero, preventing the following warning: WARNING: CPU: 0 PID: 9692 at mm/vmalloc.c:3059 __vmalloc_node_range+0x528/0x648 Modules linked in: CPU: 0 PID: 9692 Comm: memop Not tainted 5.17.0-rc3-e4+ #80 Hardware name: IBM 8561 T01 701 (LPAR) Krnl PSW : 0704c00180000000 0000000082dc584c (__vmalloc_node_range+0x52c/0x648) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000083 ffffffffffffffff 0000000000000000 0000000000000001 0000038000000000 000003ff80000000 0000000000000cc0 000000008ebb8000 0000000087a8a700 000000004040aeb1 000003ffd9f7dec8 000000008ebb8000 000000009d9b8000 000000000102a1b4 00000380035afb68 00000380035afaa8 Krnl Code: 0000000082dc583e: d028a7f4ff80 trtr 2036(41,%r10),3968(%r15) 0000000082dc5844: af000000 mc 0,0 #0000000082dc5848: af000000 mc 0,0 >0000000082dc584c: a7d90000 lghi %r13,0 0000000082dc5850: b904002d lgr %r2,%r13 0000000082dc5854: eb6ff1080004 lmg %r6,%r15,264(%r15) 0000000082dc585a: 07fe bcr 15,%r14 0000000082dc585c: 47000700 bc 0,1792 Call Trace: [<0000000082dc584c>] __vmalloc_node_range+0x52c/0x648 [<0000000082dc5b62>] vmalloc+0x5a/0x68 [<000003ff8067f4ca>] kvm_arch_vm_ioctl+0x2da/0x2a30 [kvm] [<000003ff806705bc>] kvm_vm_ioctl+0x4ec/0x978 [kvm] [<0000000082e562fe>] __s390x_sys_ioctl+0xbe/0x100 [<000000008360a9bc>] __do_syscall+0x1d4/0x200 [<0000000083618bd2>] system_call+0x82/0xb0 Last Breaking-Event-Address: [<0000000082dc5348>] __vmalloc_node_range+0x28/0x648 Other than the warning, there is no ill effect from the missing check, the condition is detected by subsequent code and causes a return with ENOMEM. Fixes: ef11c9463ae0 (KVM: s390: Add vm IOCTL for key checked guest absolute memory access) Signed-off-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20220221163237.4122868-1-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c2c26c2aad64..e056ad86ccd2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2374,7 +2374,7 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION | KVM_S390_MEMOP_F_CHECK_ONLY; - if (mop->flags & ~supported_flags) + if (mop->flags & ~supported_flags || !mop->size) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; -- cgit v1.2.3 From ee6a569d3bf64c9676eee3eecb861fb01cc11311 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 9 Feb 2022 16:22:17 +0100 Subject: KVM: s390: pv: make use of ultravisor AIV support This patch enables the ultravisor adapter interruption vitualization support indicated by UV feature BIT_UV_FEAT_AIV. This allows ISC interruption injection directly into the GISA IPM for PV kvm guests. Hardware that does not support this feature will continue to use the UV interruption interception method to deliver ISC interruptions to PV kvm guests. For this purpose, the ECA_AIV bit for all guest cpus will be cleared and the GISA will be disabled during PV CPU setup. In addition a check in __inject_io() has been removed. That reduces the required instructions for interruption handling for PV and traditional kvm guests. Signed-off-by: Michael Mueller Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220209152217.1793281-2-mimu@linux.ibm.com Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/uv.h | 1 + arch/s390/kvm/interrupt.c | 54 ++++++++++++++++++++++++++++++++++++++++------ arch/s390/kvm/kvm-s390.c | 11 +++++++--- arch/s390/kvm/kvm-s390.h | 11 ++++++++++ 4 files changed, 68 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 86218382d29c..a2d376b8bce3 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -80,6 +80,7 @@ enum uv_cmds_inst { enum uv_feat_ind { BIT_UV_FEAT_MISC = 0, + BIT_UV_FEAT_AIV = 1, }; struct uv_cb_header { diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index db933c252dbc..9b30beac904d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1901,13 +1901,12 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) isc = int_word_to_isc(inti->io.io_int_word); /* - * Do not make use of gisa in protected mode. We do not use the lock - * checking variant as this is just a performance optimization and we - * do not hold the lock here. This is ok as the code will pick - * interrupts from both "lists" for delivery. + * We do not use the lock checking variant as this is just a + * performance optimization and we do not hold the lock here. + * This is ok as the code will pick interrupts from both "lists" + * for delivery. */ - if (!kvm_s390_pv_get_handle(kvm) && - gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { + if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); gisa_set_ipm_gisc(gi->origin, isc); kfree(inti); @@ -3171,9 +3170,33 @@ void kvm_s390_gisa_init(struct kvm *kvm) VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); } +void kvm_s390_gisa_enable(struct kvm *kvm) +{ + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + struct kvm_vcpu *vcpu; + unsigned long i; + u32 gisa_desc; + + if (gi->origin) + return; + kvm_s390_gisa_init(kvm); + gisa_desc = kvm_s390_get_gisa_desc(kvm); + if (!gisa_desc) + return; + kvm_for_each_vcpu(i, vcpu, kvm) { + mutex_lock(&vcpu->mutex); + vcpu->arch.sie_block->gd = gisa_desc; + vcpu->arch.sie_block->eca |= ECA_AIV; + VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", + vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); + mutex_unlock(&vcpu->mutex); + } +} + void kvm_s390_gisa_destroy(struct kvm *kvm) { struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + struct kvm_s390_gisa *gisa = gi->origin; if (!gi->origin) return; @@ -3184,6 +3207,25 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) cpu_relax(); hrtimer_cancel(&gi->timer); gi->origin = NULL; + VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); +} + +void kvm_s390_gisa_disable(struct kvm *kvm) +{ + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + struct kvm_vcpu *vcpu; + unsigned long i; + + if (!gi->origin) + return; + kvm_for_each_vcpu(i, vcpu, kvm) { + mutex_lock(&vcpu->mutex); + vcpu->arch.sie_block->eca &= ~ECA_AIV; + vcpu->arch.sie_block->gd = 0U; + mutex_unlock(&vcpu->mutex); + VCPU_EVENT(vcpu, 3, "AIV disabled for cpu %03u", vcpu->vcpu_id); + } + kvm_s390_gisa_destroy(kvm); } /** diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e056ad86ccd2..b5ea95cf8686 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2195,6 +2195,9 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) } mutex_unlock(&vcpu->mutex); } + /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ + if (use_gisa) + kvm_s390_gisa_enable(kvm); return ret; } @@ -2206,6 +2209,10 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) struct kvm_vcpu *vcpu; + /* Disable the GISA if the ultravisor does not support AIV. */ + if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) + kvm_s390_gisa_disable(kvm); + kvm_for_each_vcpu(i, vcpu, kvm) { mutex_lock(&vcpu->mutex); r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); @@ -3350,9 +3357,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->icpua = vcpu->vcpu_id; spin_lock_init(&vcpu->arch.local_int.lock); - vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; - if (vcpu->arch.sie_block->gd && sclp.has_gisaf) - vcpu->arch.sie_block->gd |= GISA_FORMAT1; + vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); seqcount_init(&vcpu->arch.cputm_seqcount); vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 098831e815e6..4ba8fc30d87a 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -231,6 +231,15 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) return ms->base_gfn + ms->npages; } +static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) +{ + u32 gd = (u32)(u64)kvm->arch.gisa_int.origin; + + if (gd && sclp.has_gisaf) + gd |= GISA_FORMAT1; + return gd; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); @@ -450,6 +459,8 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, void kvm_s390_gisa_init(struct kvm *kvm); void kvm_s390_gisa_clear(struct kvm *kvm); void kvm_s390_gisa_destroy(struct kvm *kvm); +void kvm_s390_gisa_disable(struct kvm *kvm); +void kvm_s390_gisa_enable(struct kvm *kvm); int kvm_s390_gib_init(u8 nisc); void kvm_s390_gib_destroy(void); -- cgit v1.2.3 From cc65c3a110db689abe06c41296cf4da8f0a69b35 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Feb 2022 18:22:46 +0000 Subject: KVM: s390: Replace KVM_REQ_MMU_RELOAD usage with arch specific request Add an arch request, KVM_REQ_REFRESH_GUEST_PREFIX, to deal with guest prefix changes instead of piggybacking KVM_REQ_MMU_RELOAD. This will allow for the removal of the generic KVM_REQ_MMU_RELOAD, which isn't actually used by generic KVM. No functional change intended. Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Signed-off-by: Sean Christopherson Message-Id: <20220225182248.3812651-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/s390/include/asm/kvm_host.h | 2 ++ arch/s390/kvm/kvm-s390.c | 8 ++++---- arch/s390/kvm/kvm-s390.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a22c9266ea05..766028d54a3e 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -45,6 +45,8 @@ #define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3) #define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4) #define KVM_REQ_VSIE_RESTART KVM_ARCH_REQ(5) +#define KVM_REQ_REFRESH_GUEST_PREFIX \ + KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e056ad86ccd2..5b2b3cbf0d55 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3481,7 +3481,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", start, end); - kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); + kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); } } } @@ -3883,19 +3883,19 @@ retry: if (!kvm_request_pending(vcpu)) return 0; /* - * We use MMU_RELOAD just to re-arm the ipte notifier for the + * If the guest prefix changed, re-arm the ipte notifier for the * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. * This ensures that the ipte instruction for this request has * already finished. We might race against a second unmapper that * wants to set the blocking bit. Lets just retry the request loop. */ - if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; rc = gmap_mprotect_notify(vcpu->arch.gmap, kvm_s390_get_prefix(vcpu), PAGE_SIZE * 2, PROT_WRITE); if (rc) { - kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; } goto retry; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 098831e815e6..45b7c1edd85f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -105,7 +105,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) prefix); vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); } static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) -- cgit v1.2.3 From c0573ba5c5a2244dc02060b1f374d4593c1d20b7 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 1 Mar 2022 15:33:40 +0100 Subject: KVM: s390x: fix SCK locking When handling the SCK instruction, the kvm lock is taken, even though the vcpu lock is already being held. The normal locking order is kvm lock first and then vcpu lock. This is can (and in some circumstances does) lead to deadlocks. The function kvm_s390_set_tod_clock is called both by the SCK handler and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu lock, so they can safely take the kvm lock. The SCK handler holds the vcpu lock, but will also somehow need to acquire the kvm lock without relinquishing the vcpu lock. The solution is to factor out the code to set the clock, and provide two wrappers. One is called like the original function and does the locking, the other is called kvm_s390_try_set_tod_clock and uses trylock to try to acquire the kvm lock. This new wrapper is then used in the SCK handler. If locking fails, -EAGAIN is returned, which is eventually propagated to userspace, thus also freeing the vcpu lock and allowing for forward progress. This is not the most efficient or elegant way to solve this issue, but the SCK instruction is deprecated and its performance is not critical. The goal of this patch is just to provide a simple but correct way to fix the bug. Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction") Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 19 ++++++++++++++++--- arch/s390/kvm/kvm-s390.h | 4 ++-- arch/s390/kvm/priv.c | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b5ea95cf8686..b53ff693b66e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3961,14 +3961,12 @@ retry: return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; union tod_clock clk; unsigned long i; - mutex_lock(&kvm->lock); preempt_disable(); store_tod_clock_ext(&clk); @@ -3989,7 +3987,22 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); + return 1; } /** diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4ba8fc30d87a..798955b62fa3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -358,8 +358,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 30b24c42ef99..5beb7a4a11b3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, >od); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, >od)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0; -- cgit v1.2.3