summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/sgx/ioctl.c
diff options
context:
space:
mode:
authorJarkko Sakkinen <jarkko@kernel.org>2020-11-13 00:01:32 +0200
committerBorislav Petkov <bp@suse.de>2020-11-18 18:04:11 +0100
commit1728ab54b4be94aed89276eeb8e750a345659765 (patch)
treebf3fcf3943c20ea9cdf4d16dd7869d621fdf268b /arch/x86/kernel/cpu/sgx/ioctl.c
parent2adcba79e69d4a4c0ac3bb86f466d8b5df301608 (diff)
downloadlinux-stable-1728ab54b4be94aed89276eeb8e750a345659765.tar.gz
linux-stable-1728ab54b4be94aed89276eeb8e750a345659765.tar.bz2
linux-stable-1728ab54b4be94aed89276eeb8e750a345659765.zip
x86/sgx: Add a page reclaimer
Just like normal RAM, there is a limited amount of enclave memory available and overcommitting it is a very valuable tool to reduce resource use. Introduce a simple reclaim mechanism for enclave pages. In contrast to normal page reclaim, the kernel cannot directly access enclave memory. To get around this, the SGX architecture provides a set of functions to help. Among other things, these functions copy enclave memory to and from normal memory, encrypting it and protecting its integrity in the process. Implement a page reclaimer by using these functions. Picks victim pages in LRU fashion from all the enclaves running in the system. A new kernel thread (ksgxswapd) reclaims pages in the background based on watermarks, similar to normal kswapd. All enclave pages can be reclaimed, architecturally. But, there are some limits to this, such as the special SECS metadata page which must be reclaimed last. The page version array (used to mitigate replaying old reclaimed pages) is also architecturally reclaimable, but not yet implemented. The end result is that the vast majority of enclave pages are currently reclaimable. Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de> Acked-by: Jethro Beekman <jethro@fortanix.com> Link: https://lkml.kernel.org/r/20201112220135.165028-22-jarkko@kernel.org
Diffstat (limited to 'arch/x86/kernel/cpu/sgx/ioctl.c')
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c89
1 files changed, 85 insertions, 4 deletions
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 0ba0e670e2f0..6d37117ac8a0 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -16,20 +16,77 @@
#include "encl.h"
#include "encls.h"
+static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
+{
+ struct sgx_va_page *va_page = NULL;
+ void *err;
+
+ BUILD_BUG_ON(SGX_VA_SLOT_COUNT !=
+ (SGX_ENCL_PAGE_VA_OFFSET_MASK >> 3) + 1);
+
+ if (!(encl->page_cnt % SGX_VA_SLOT_COUNT)) {
+ va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
+ if (!va_page)
+ return ERR_PTR(-ENOMEM);
+
+ va_page->epc_page = sgx_alloc_va_page();
+ if (IS_ERR(va_page->epc_page)) {
+ err = ERR_CAST(va_page->epc_page);
+ kfree(va_page);
+ return err;
+ }
+
+ WARN_ON_ONCE(encl->page_cnt % SGX_VA_SLOT_COUNT);
+ }
+ encl->page_cnt++;
+ return va_page;
+}
+
+static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
+{
+ encl->page_cnt--;
+
+ if (va_page) {
+ sgx_free_epc_page(va_page->epc_page);
+ list_del(&va_page->list);
+ kfree(va_page);
+ }
+}
+
static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
{
struct sgx_epc_page *secs_epc;
+ struct sgx_va_page *va_page;
struct sgx_pageinfo pginfo;
struct sgx_secinfo secinfo;
unsigned long encl_size;
+ struct file *backing;
long ret;
+ va_page = sgx_encl_grow(encl);
+ if (IS_ERR(va_page))
+ return PTR_ERR(va_page);
+ else if (va_page)
+ list_add(&va_page->list, &encl->va_pages);
+ /* else the tail page of the VA page list had free slots. */
+
/* The extra page goes to SECS. */
encl_size = secs->size + PAGE_SIZE;
- secs_epc = __sgx_alloc_epc_page();
- if (IS_ERR(secs_epc))
- return PTR_ERR(secs_epc);
+ backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
+ VM_NORESERVE);
+ if (IS_ERR(backing)) {
+ ret = PTR_ERR(backing);
+ goto err_out_shrink;
+ }
+
+ encl->backing = backing;
+
+ secs_epc = sgx_alloc_epc_page(&encl->secs, true);
+ if (IS_ERR(secs_epc)) {
+ ret = PTR_ERR(secs_epc);
+ goto err_out_backing;
+ }
encl->secs.epc_page = secs_epc;
@@ -63,6 +120,13 @@ err_out:
sgx_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
+err_out_backing:
+ fput(encl->backing);
+ encl->backing = NULL;
+
+err_out_shrink:
+ sgx_encl_shrink(encl, va_page);
+
return ret;
}
@@ -228,22 +292,36 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
{
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
+ struct sgx_va_page *va_page;
int ret;
encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags);
if (IS_ERR(encl_page))
return PTR_ERR(encl_page);
- epc_page = __sgx_alloc_epc_page();
+ epc_page = sgx_alloc_epc_page(encl_page, true);
if (IS_ERR(epc_page)) {
kfree(encl_page);
return PTR_ERR(epc_page);
}
+ va_page = sgx_encl_grow(encl);
+ if (IS_ERR(va_page)) {
+ ret = PTR_ERR(va_page);
+ goto err_out_free;
+ }
+
mmap_read_lock(current->mm);
mutex_lock(&encl->lock);
/*
+ * Adding to encl->va_pages must be done under encl->lock. Ditto for
+ * deleting (via sgx_encl_shrink()) in the error path.
+ */
+ if (va_page)
+ list_add(&va_page->list, &encl->va_pages);
+
+ /*
* Insert prior to EADD in case of OOM. EADD modifies MRENCLAVE, i.e.
* can't be gracefully unwound, while failure on EADD/EXTEND is limited
* to userspace errors (or kernel/hardware bugs).
@@ -273,6 +351,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
goto err_out;
}
+ sgx_mark_page_reclaimable(encl_page->epc_page);
mutex_unlock(&encl->lock);
mmap_read_unlock(current->mm);
return ret;
@@ -281,9 +360,11 @@ err_out:
xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
err_out_unlock:
+ sgx_encl_shrink(encl, va_page);
mutex_unlock(&encl->lock);
mmap_read_unlock(current->mm);
+err_out_free:
sgx_free_epc_page(epc_page);
kfree(encl_page);