summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/xen.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/xen.c')
-rw-r--r--arch/x86/kvm/xen.c290
1 files changed, 290 insertions, 0 deletions
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index af8f6562fce4..ae17250e1efe 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -11,9 +11,11 @@
#include "hyperv.h"
#include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
#include "trace.h"
@@ -61,6 +63,132 @@ out:
return ret;
}
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+{
+ struct kvm_vcpu_xen *vx = &v->arch.xen;
+ u64 now = get_kvmclock_ns(v->kvm);
+ u64 delta_ns = now - vx->runstate_entry_time;
+ u64 run_delay = current->sched_info.run_delay;
+
+ if (unlikely(!vx->runstate_entry_time))
+ vx->current_runstate = RUNSTATE_offline;
+
+ /*
+ * Time waiting for the scheduler isn't "stolen" if the
+ * vCPU wasn't running anyway.
+ */
+ if (vx->current_runstate == RUNSTATE_running) {
+ u64 steal_ns = run_delay - vx->last_steal;
+
+ delta_ns -= steal_ns;
+
+ vx->runstate_times[RUNSTATE_runnable] += steal_ns;
+ }
+ vx->last_steal = run_delay;
+
+ vx->runstate_times[vx->current_runstate] += delta_ns;
+ vx->current_runstate = state;
+ vx->runstate_entry_time = now;
+}
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+{
+ struct kvm_vcpu_xen *vx = &v->arch.xen;
+ uint64_t state_entry_time;
+ unsigned int offset;
+
+ kvm_xen_update_runstate(v, state);
+
+ if (!vx->runstate_set)
+ return;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+ offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+#ifdef CONFIG_X86_64
+ /*
+ * The only difference is alignment of uint64_t in 32-bit.
+ * So the first field 'state' is accessed directly using
+ * offsetof() (where its offset happens to be zero), while the
+ * remaining fields which are all uint64_t, start at 'offset'
+ * which we tweak here by adding 4.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+ if (v->kvm->arch.xen.long_mode)
+ offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+ /*
+ * First write the updated state_entry_time at the appropriate
+ * location determined by 'offset'.
+ */
+ state_entry_time = vx->runstate_entry_time;
+ state_entry_time |= XEN_RUNSTATE_UPDATE;
+
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+ sizeof(state_entry_time));
+ BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+ sizeof(state_entry_time));
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &state_entry_time, offset,
+ sizeof(state_entry_time)))
+ return;
+ smp_wmb();
+
+ /*
+ * Next, write the new runstate. This is in the *same* place
+ * for 32-bit and 64-bit guests, asserted here for paranoia.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+ offsetof(struct compat_vcpu_runstate_info, state));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ sizeof(vx->current_runstate));
+ BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+ sizeof(vx->current_runstate));
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &vx->current_runstate,
+ offsetof(struct vcpu_runstate_info, state),
+ sizeof(vx->current_runstate)))
+ return;
+
+ /*
+ * Write the actual runstate times immediately after the
+ * runstate_entry_time.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ sizeof(vx->runstate_times));
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &vx->runstate_times[0],
+ offset + sizeof(u64),
+ sizeof(vx->runstate_times)))
+ return;
+
+ smp_wmb();
+
+ /*
+ * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
+ * runstate_entry_time field.
+ */
+
+ state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &state_entry_time, offset,
+ sizeof(state_entry_time)))
+ return;
+}
+
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{
u8 rc = 0;
@@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
/* No compat necessary here. */
BUILD_BUG_ON(sizeof(struct vcpu_info) !=
sizeof(struct compat_vcpu_info));
+ BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
+ offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_info_set = false;
+ r = 0;
break;
}
@@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_time_info_set = false;
+ r = 0;
break;
}
@@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
}
break;
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.gpa == GPA_INVALID) {
+ vcpu->arch.xen.runstate_set = false;
+ r = 0;
+ break;
+ }
+
+ r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.xen.runstate_cache,
+ data->u.gpa,
+ sizeof(struct vcpu_runstate_info));
+ if (!r) {
+ vcpu->arch.xen.runstate_set = true;
+ }
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.runstate.state > RUNSTATE_offline) {
+ r = -EINVAL;
+ break;
+ }
+
+ kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.runstate.state > RUNSTATE_offline) {
+ r = -EINVAL;
+ break;
+ }
+ if (data->u.runstate.state_entry_time !=
+ (data->u.runstate.time_running +
+ data->u.runstate.time_runnable +
+ data->u.runstate.time_blocked +
+ data->u.runstate.time_offline)) {
+ r = -EINVAL;
+ break;
+ }
+ if (get_kvmclock_ns(vcpu->kvm) <
+ data->u.runstate.state_entry_time) {
+ r = -EINVAL;
+ break;
+ }
+
+ vcpu->arch.xen.current_runstate = data->u.runstate.state;
+ vcpu->arch.xen.runstate_entry_time =
+ data->u.runstate.state_entry_time;
+ vcpu->arch.xen.runstate_times[RUNSTATE_running] =
+ data->u.runstate.time_running;
+ vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
+ data->u.runstate.time_runnable;
+ vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
+ data->u.runstate.time_blocked;
+ vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
+ data->u.runstate.time_offline;
+ vcpu->arch.xen.last_steal = current->sched_info.run_delay;
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.runstate.state > RUNSTATE_offline &&
+ data->u.runstate.state != (u64)-1) {
+ r = -EINVAL;
+ break;
+ }
+ /* The adjustment must add up */
+ if (data->u.runstate.state_entry_time !=
+ (data->u.runstate.time_running +
+ data->u.runstate.time_runnable +
+ data->u.runstate.time_blocked +
+ data->u.runstate.time_offline)) {
+ r = -EINVAL;
+ break;
+ }
+
+ if (get_kvmclock_ns(vcpu->kvm) <
+ (vcpu->arch.xen.runstate_entry_time +
+ data->u.runstate.state_entry_time)) {
+ r = -EINVAL;
+ break;
+ }
+
+ vcpu->arch.xen.runstate_entry_time +=
+ data->u.runstate.state_entry_time;
+ vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
+ data->u.runstate.time_running;
+ vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
+ data->u.runstate.time_runnable;
+ vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
+ data->u.runstate.time_blocked;
+ vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
+ data->u.runstate.time_offline;
+
+ if (data->u.runstate.state <= RUNSTATE_offline)
+ kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+ r = 0;
+ break;
+
default:
break;
}
@@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
r = 0;
break;
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (vcpu->arch.xen.runstate_set) {
+ data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+ r = 0;
+ }
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ data->u.runstate.state = vcpu->arch.xen.current_runstate;
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ data->u.runstate.state = vcpu->arch.xen.current_runstate;
+ data->u.runstate.state_entry_time =
+ vcpu->arch.xen.runstate_entry_time;
+ data->u.runstate.time_running =
+ vcpu->arch.xen.runstate_times[RUNSTATE_running];
+ data->u.runstate.time_runnable =
+ vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
+ data->u.runstate.time_blocked =
+ vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
+ data->u.runstate.time_offline =
+ vcpu->arch.xen.runstate_times[RUNSTATE_offline];
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+ r = -EINVAL;
+ break;
+
default:
break;
}