diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-02 11:24:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-02 11:24:14 -0700 |
commit | d7e0a795bf37a13554c80cfc5ba97abedf53f391 (patch) | |
tree | 26f107fbe530b1bd0912a748b808cbe476bfbf49 /arch/s390 | |
parent | 44261f8e287d1b02a2e4bfbd7399fb8d37d1ee24 (diff) | |
parent | 52cf891d8dbd7592261fa30f373410b97f22b76c (diff) | |
download | linux-stable-d7e0a795bf37a13554c80cfc5ba97abedf53f391.tar.gz linux-stable-d7e0a795bf37a13554c80cfc5ba97abedf53f391.tar.bz2 linux-stable-d7e0a795bf37a13554c80cfc5ba97abedf53f391.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"ARM:
- More progress on the protected VM front, now with the full fixed
feature set as well as the limitation of some hypercalls after
initialisation.
- Cleanup of the RAZ/WI sysreg handling, which was pointlessly
complicated
- Fixes for the vgic placement in the IPA space, together with a
bunch of selftests
- More memcg accounting of the memory allocated on behalf of a guest
- Timer and vgic selftests
- Workarounds for the Apple M1 broken vgic implementation
- KConfig cleanups
- New kvmarm.mode=none option, for those who really dislike us
RISC-V:
- New KVM port.
x86:
- New API to control TSC offset from userspace
- TSC scaling for nested hypervisors on SVM
- Switch masterclock protection from raw_spin_lock to seqcount
- Clean up function prototypes in the page fault code and avoid
repeated memslot lookups
- Convey the exit reason to userspace on emulation failure
- Configure time between NX page recovery iterations
- Expose Predictive Store Forwarding Disable CPUID leaf
- Allocate page tracking data structures lazily (if the i915 KVM-GT
functionality is not compiled in)
- Cleanups, fixes and optimizations for the shadow MMU code
s390:
- SIGP Fixes
- initial preparations for lazy destroy of secure VMs
- storage key improvements/fixes
- Log the guest CPNC
Starting from this release, KVM-PPC patches will come from Michael
Ellerman's PPC tree"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (227 commits)
RISC-V: KVM: fix boolreturn.cocci warnings
RISC-V: KVM: remove unneeded semicolon
RISC-V: KVM: Fix GPA passed to __kvm_riscv_hfence_gvma_xyz() functions
RISC-V: KVM: Factor-out FP virtualization into separate sources
KVM: s390: add debug statement for diag 318 CPNC data
KVM: s390: pv: properly handle page flags for protected guests
KVM: s390: Fix handle_sske page fault handling
KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol
KVM: x86: On emulation failure, convey the exit reason, etc. to userspace
KVM: x86: Get exit_reason as part of kvm_x86_ops.get_exit_info
KVM: x86: Clarify the kvm_run.emulation_failure structure layout
KVM: s390: Add a routine for setting userspace CPU state
KVM: s390: Simplify SIGP Set Arch handling
KVM: s390: pv: avoid stalls when making pages secure
KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm
KVM: s390: pv: avoid double free of sida page
KVM: s390: pv: add macros for UVC CC values
s390/mm: optimize reset_guest_reference_bit()
s390/mm: optimize set_guest_storage_key()
s390/mm: no need for pte_alloc_map_lock() if we know the pmd is present
...
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 9 | ||||
-rw-r--r-- | arch/s390/include/asm/uv.h | 15 | ||||
-rw-r--r-- | arch/s390/kernel/uv.c | 65 | ||||
-rw-r--r-- | arch/s390/kvm/intercept.c | 5 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 7 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 9 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 2 | ||||
-rw-r--r-- | arch/s390/kvm/pv.c | 21 | ||||
-rw-r--r-- | arch/s390/kvm/sigp.c | 14 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 15 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 109 |
11 files changed, 196 insertions, 75 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b61426c9ef17..e43416950245 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1074,8 +1074,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t res; res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + /* At this point the reference through the mapping is still present */ if (mm_is_protected(mm) && pte_present(res)) - uv_convert_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); return res; } @@ -1091,8 +1092,9 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pte_t res; res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); + /* At this point the reference through the mapping is still present */ if (mm_is_protected(vma->vm_mm) && pte_present(res)) - uv_convert_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); return res; } @@ -1116,8 +1118,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, } else { res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } + /* At this point the reference through the mapping is still present */ if (mm_is_protected(mm) && pte_present(res)) - uv_convert_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); return res; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index fe92a4caf5ec..72d3e49c2860 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -18,6 +18,11 @@ #include <asm/page.h> #include <asm/gmap.h> +#define UVC_CC_OK 0 +#define UVC_CC_ERROR 1 +#define UVC_CC_BUSY 2 +#define UVC_CC_PARTIAL 3 + #define UVC_RC_EXECUTED 0x0001 #define UVC_RC_INV_CMD 0x0002 #define UVC_RC_INV_STATE 0x0003 @@ -351,8 +356,9 @@ static inline int is_prot_virt_host(void) } int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int uv_destroy_page(unsigned long paddr); +int uv_destroy_owned_page(unsigned long paddr); int uv_convert_from_secure(unsigned long paddr); +int uv_convert_owned_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); @@ -360,7 +366,7 @@ void setup_uv(void); #define is_prot_virt_host() 0 static inline void setup_uv(void) {} -static inline int uv_destroy_page(unsigned long paddr) +static inline int uv_destroy_owned_page(unsigned long paddr) { return 0; } @@ -369,6 +375,11 @@ static inline int uv_convert_from_secure(unsigned long paddr) { return 0; } + +static inline int uv_convert_owned_from_secure(unsigned long paddr) +{ + return 0; +} #endif #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 5a656c7b7a67..8b0e62507d62 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -100,7 +100,7 @@ static int uv_pin_shared(unsigned long paddr) * * @paddr: Absolute host address of page to be destroyed */ -int uv_destroy_page(unsigned long paddr) +static int uv_destroy_page(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_DESTR_SEC_STOR, @@ -121,6 +121,22 @@ int uv_destroy_page(unsigned long paddr) } /* + * The caller must already hold a reference to the page + */ +int uv_destroy_owned_page(unsigned long paddr) +{ + struct page *page = phys_to_page(paddr); + int rc; + + get_page(page); + rc = uv_destroy_page(paddr); + if (!rc) + clear_bit(PG_arch_1, &page->flags); + put_page(page); + return rc; +} + +/* * Requests the Ultravisor to encrypt a guest page and make it * accessible to the host for paging (export). * @@ -140,6 +156,22 @@ int uv_convert_from_secure(unsigned long paddr) } /* + * The caller must already hold a reference to the page + */ +int uv_convert_owned_from_secure(unsigned long paddr) +{ + struct page *page = phys_to_page(paddr); + int rc; + + get_page(page); + rc = uv_convert_from_secure(paddr); + if (!rc) + clear_bit(PG_arch_1, &page->flags); + put_page(page); + return rc; +} + +/* * Calculate the expected ref_count for a page that would otherwise have no * further pins. This was cribbed from similar functions in other places in * the kernel, but with some slight modifications. We know that a secure @@ -165,7 +197,7 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, { pte_t entry = READ_ONCE(*ptep); struct page *page; - int expected, rc = 0; + int expected, cc = 0; if (!pte_present(entry)) return -ENXIO; @@ -181,12 +213,25 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, if (!page_ref_freeze(page, expected)) return -EBUSY; set_bit(PG_arch_1, &page->flags); - rc = uv_call(0, (u64)uvcb); + /* + * If the UVC does not succeed or fail immediately, we don't want to + * loop for long, or we might get stall notifications. + * On the other hand, this is a complex scenario and we are holding a lot of + * locks, so we can't easily sleep and reschedule. We try only once, + * and if the UVC returned busy or partial completion, we return + * -EAGAIN and we let the callers deal with it. + */ + cc = __uv_call(0, (u64)uvcb); page_ref_unfreeze(page, expected); - /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */ - if (rc) - rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL; - return rc; + /* + * Return -ENXIO if the page was not mapped, -EINVAL for other errors. + * If busy or partially completed, return -EAGAIN. + */ + if (cc == UVC_CC_OK) + return 0; + else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL) + return -EAGAIN; + return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } /* @@ -212,7 +257,7 @@ again: uaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(uaddr)) goto out; - vma = find_vma(gmap->mm, uaddr); + vma = vma_lookup(gmap->mm, uaddr); if (!vma) goto out; /* @@ -239,6 +284,10 @@ out: mmap_read_unlock(gmap->mm); if (rc == -EAGAIN) { + /* + * If we are here because the UVC returned busy or partial + * completion, this is just a useless check, but it is safe. + */ wait_on_page_writeback(page); } else if (rc == -EBUSY) { /* diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2bd8f854f1b4..d07ff646d844 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -518,6 +518,11 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) */ if (rc == -EINVAL) return 0; + /* + * If we got -EAGAIN here, we simply return it. It will eventually + * get propagated all the way to userspace, which should then try + * again. + */ return rc; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1c97493d21e1..c6257f625929 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2487,8 +2487,8 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_S390_PV_COMMAND: { struct kvm_pv_cmd args; - /* protvirt means user sigp */ - kvm->arch.user_cpu_state_ctrl = 1; + /* protvirt means user cpu state */ + kvm_s390_set_user_cpu_state_ctrl(kvm); r = 0; if (!is_prot_virt_host()) { r = -EINVAL; @@ -3802,7 +3802,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_load(vcpu); /* user space knows about this interface - let it control the state */ - vcpu->kvm->arch.user_cpu_state_ctrl = 1; + kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); switch (mp_state->mp_state) { case KVM_MP_STATE_STOPPED: @@ -4255,6 +4255,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; + VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); } /* * If userspace sets the riccb (e.g. after migration) to a valid state, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 52bc8fbaa60a..c07a050d757d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -208,6 +208,15 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) return kvm->arch.user_cpu_state_ctrl != 0; } +static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm) +{ + if (kvm->arch.user_cpu_state_ctrl) + return; + + VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control"); + kvm->arch.user_cpu_state_ctrl = 1; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 53da4ceb16a3..417154b314a6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -397,6 +397,8 @@ static int handle_sske(struct kvm_vcpu *vcpu) mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc == -EAGAIN) + continue; if (rc < 0) return rc; start += PAGE_SIZE; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index c8841f476e91..00d272d134c2 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -16,18 +16,17 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) { - int cc = 0; + int cc; - if (kvm_s390_pv_cpu_get_handle(vcpu)) { - cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), - UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + if (!kvm_s390_pv_cpu_get_handle(vcpu)) + return 0; + + cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + + KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", + vcpu->vcpu_id, *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); - KVM_UV_EVENT(vcpu->kvm, 3, - "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", - vcpu->vcpu_id, *rc, *rrc); - WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", - *rc, *rrc); - } /* Intended memory leak for something that should never happen. */ if (!cc) free_pages(vcpu->arch.pv.stor_base, @@ -196,7 +195,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; - cc = uv_call(0, (u64)&uvcb); + cc = uv_call_sched(0, (u64)&uvcb); *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x", diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 683036c1c92a..cf4de80bd541 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -151,22 +151,10 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, u64 *status_reg) { - unsigned int i; - struct kvm_vcpu *v; - bool all_stopped = true; - - kvm_for_each_vcpu(i, v, vcpu->kvm) { - if (v == vcpu) - continue; - if (!is_vcpu_stopped(v)) - all_stopped = false; - } - *status_reg &= 0xffffffff00000000UL; /* Reject set arch order, with czam we're always in z/Arch mode. */ - *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : - SIGP_STATUS_INCORRECT_STATE); + *status_reg |= SIGP_STATUS_INVALID_PARAMETER; return SIGP_CC_STATUS_STORED; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 4d3b33ce81c6..dfee0ebb2fac 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -672,6 +672,7 @@ EXPORT_SYMBOL_GPL(gmap_fault); */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { + struct vm_area_struct *vma; unsigned long vmaddr; spinlock_t *ptl; pte_t *ptep; @@ -681,11 +682,17 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; + + vma = vma_lookup(gmap->mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) + if (likely(ptep)) { ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(ptep, ptl); + } } } EXPORT_SYMBOL_GPL(__gmap_zap); @@ -2677,8 +2684,10 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr, { pte_t pte = READ_ONCE(*ptep); + /* There is a reference through the mapping */ if (pte_present(pte)) - WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK)); + WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK)); + return 0; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 034721a68d8f..c16232cd0ec5 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -429,22 +429,36 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, } #ifdef CONFIG_PGSTE -static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) { + struct vm_area_struct *vma; pgd_t *pgd; p4d_t *p4d; pud_t *pud; - pmd_t *pmd; + + /* We need a valid VMA, otherwise this is clearly a fault. */ + vma = vma_lookup(mm, addr); + if (!vma) + return -EFAULT; pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return NULL; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return NULL; - pmd = pmd_alloc(mm, pud, addr); - return pmd; + if (!pgd_present(*pgd)) + return -ENOENT; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return -ENOENT; + + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return -ENOENT; + + /* Large PUDs are not supported yet. */ + if (pud_large(*pud)) + return -EFAULT; + + *pmdp = pmd_offset(pud, addr); + return 0; } #endif @@ -778,14 +792,23 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * we can ignore attempts to set the key to 0, because it already is 0. + */ + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return key ? -EFAULT : 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); - return -EFAULT; + return key ? -EFAULT : 0; } if (pmd_large(*pmdp)) { @@ -801,10 +824,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); @@ -881,14 +901,23 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pte_t *ptep; int cc = 0; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * the storage key is 0 and there is nothing for us to do. + */ + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); - return -EFAULT; + return 0; } if (pmd_large(*pmdp)) { @@ -900,10 +929,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ pgste_val(new) &= ~PGSTE_GR_BIT; @@ -935,15 +961,24 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * the storage key is 0. + */ + *key = 0; + + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { - /* Not yet mapped memory has a zero key */ spin_unlock(ptl); - *key = 0; return 0; } @@ -956,10 +991,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; paddr = pte_val(*ptep) & PAGE_MASK; @@ -988,6 +1020,7 @@ EXPORT_SYMBOL(get_guest_storage_key); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste) { + struct vm_area_struct *vma; unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; @@ -997,6 +1030,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, WARN_ON_ONCE(orc > ESSA_MAX); if (unlikely(orc > ESSA_MAX)) return -EINVAL; + + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1089,10 +1126,14 @@ EXPORT_SYMBOL(pgste_perform_essa); int set_pgste_bits(struct mm_struct *mm, unsigned long hva, unsigned long bits, unsigned long value) { + struct vm_area_struct *vma; spinlock_t *ptl; pgste_t new; pte_t *ptep; + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1117,9 +1158,13 @@ EXPORT_SYMBOL(set_pgste_bits); */ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) { + struct vm_area_struct *vma; spinlock_t *ptl; pte_t *ptep; + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; |