diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/hypervisor.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/kgdb.h | 20 | ||||
-rw-r--r-- | arch/x86/include/asm/setup.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/hypervisor.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 3 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 3 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 189 | ||||
-rw-r--r-- | arch/x86/xen/Kconfig | 5 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 197 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 35 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 1 | ||||
-rw-r--r-- | arch/x86/xen/platform-pci-unplug.c | 137 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 72 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 12 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 96 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 13 |
20 files changed, 629 insertions, 173 deletions
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 70abda7058c8..ff2546ce7178 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_hyper; /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_hvm; #endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 8767d99c4f64..e2ca30092557 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -125,6 +125,9 @@ */ #define MCE_SELF_VECTOR 0xeb +/* Xen vector callback to receive events in a HVM domain */ +#define XEN_HVM_EVTCHN_CALLBACK 0xe9 + #define NR_VECTORS 256 #define FPU_IRQ 13 diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 006da3687cdc..396f5b5fc4d7 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -39,9 +39,11 @@ enum regnames { GDB_FS, /* 14 */ GDB_GS, /* 15 */ }; +#define GDB_ORIG_AX 41 +#define DBG_MAX_REG_NUM 16 #define NUMREGBYTES ((GDB_GS+1)*4) #else /* ! CONFIG_X86_32 */ -enum regnames64 { +enum regnames { GDB_AX, /* 0 */ GDB_BX, /* 1 */ GDB_CX, /* 2 */ @@ -59,15 +61,15 @@ enum regnames64 { GDB_R14, /* 14 */ GDB_R15, /* 15 */ GDB_PC, /* 16 */ + GDB_PS, /* 17 */ + GDB_CS, /* 18 */ + GDB_SS, /* 19 */ }; - -enum regnames32 { - GDB_PS = 34, - GDB_CS, - GDB_SS, -}; -#define NUMREGBYTES ((GDB_SS+1)*4) -#endif /* CONFIG_X86_32 */ +#define GDB_ORIG_AX 57 +#define DBG_MAX_REG_NUM 20 +/* 17 64 bit regs and 3 32 bit regs */ +#define NUMREGBYTES ((17 * 8) + (3 * 4)) +#endif /* ! CONFIG_X86_32 */ static inline void arch_kgdb_breakpoint(void) { diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 86b1506f4179..ef292c792d74 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -82,7 +82,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard) __used \ + static void __section(.discard.text) __used \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9c371e4a9fa6..7fda040a76cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) return _hypercall2(int, nmi_op, op, arg); } +static inline unsigned long __must_check +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index dd531cc56a8f..8095f8611f8a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -34,6 +34,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = { &x86_hyper_vmware, &x86_hyper_ms_hyperv, +#ifdef CONFIG_XEN_PVHVM + &x86_hyper_xen_hvm, +#endif }; const struct hypervisor_x86 *x86_hyper; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cd49141cf153..6b196834a0dd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1166,6 +1166,9 @@ ENTRY(xen_failsafe_callback) .previous ENDPROC(xen_failsafe_callback) +BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, + xen_evtchn_do_upcall) + #endif /* CONFIG_XEN */ #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4db7c4d12ffa..649ed17f7009 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1329,6 +1329,9 @@ ENTRY(xen_failsafe_callback) CFI_ENDPROC END(xen_failsafe_callback) +apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ + xen_hvm_callback_vector xen_evtchn_do_upcall + #endif /* CONFIG_XEN */ /* diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 01ab17ae2ae7..ef10940e1af0 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -49,55 +49,94 @@ #include <asm/system.h> #include <asm/apic.h> -/** - * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs - * @gdb_regs: A pointer to hold the registers in the order GDB wants. - * @regs: The &struct pt_regs of the current process. - * - * Convert the pt_regs in @regs into the format for registers that - * GDB expects, stored in @gdb_regs. - */ -void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { -#ifndef CONFIG_X86_32 - u32 *gdb_regs32 = (u32 *)gdb_regs; +#ifdef CONFIG_X86_32 + { "ax", 4, offsetof(struct pt_regs, ax) }, + { "cx", 4, offsetof(struct pt_regs, cx) }, + { "dx", 4, offsetof(struct pt_regs, dx) }, + { "bx", 4, offsetof(struct pt_regs, bx) }, + { "sp", 4, offsetof(struct pt_regs, sp) }, + { "bp", 4, offsetof(struct pt_regs, bp) }, + { "si", 4, offsetof(struct pt_regs, si) }, + { "di", 4, offsetof(struct pt_regs, di) }, + { "ip", 4, offsetof(struct pt_regs, ip) }, + { "flags", 4, offsetof(struct pt_regs, flags) }, + { "cs", 4, offsetof(struct pt_regs, cs) }, + { "ss", 4, offsetof(struct pt_regs, ss) }, + { "ds", 4, offsetof(struct pt_regs, ds) }, + { "es", 4, offsetof(struct pt_regs, es) }, + { "fs", 4, -1 }, + { "gs", 4, -1 }, +#else + { "ax", 8, offsetof(struct pt_regs, ax) }, + { "bx", 8, offsetof(struct pt_regs, bx) }, + { "cx", 8, offsetof(struct pt_regs, cx) }, + { "dx", 8, offsetof(struct pt_regs, dx) }, + { "si", 8, offsetof(struct pt_regs, dx) }, + { "di", 8, offsetof(struct pt_regs, di) }, + { "bp", 8, offsetof(struct pt_regs, bp) }, + { "sp", 8, offsetof(struct pt_regs, sp) }, + { "r8", 8, offsetof(struct pt_regs, r8) }, + { "r9", 8, offsetof(struct pt_regs, r9) }, + { "r10", 8, offsetof(struct pt_regs, r10) }, + { "r11", 8, offsetof(struct pt_regs, r11) }, + { "r12", 8, offsetof(struct pt_regs, r12) }, + { "r13", 8, offsetof(struct pt_regs, r13) }, + { "r14", 8, offsetof(struct pt_regs, r14) }, + { "r15", 8, offsetof(struct pt_regs, r15) }, + { "ip", 8, offsetof(struct pt_regs, ip) }, + { "flags", 4, offsetof(struct pt_regs, flags) }, + { "cs", 4, offsetof(struct pt_regs, cs) }, + { "ss", 4, offsetof(struct pt_regs, ss) }, #endif - gdb_regs[GDB_AX] = regs->ax; - gdb_regs[GDB_BX] = regs->bx; - gdb_regs[GDB_CX] = regs->cx; - gdb_regs[GDB_DX] = regs->dx; - gdb_regs[GDB_SI] = regs->si; - gdb_regs[GDB_DI] = regs->di; - gdb_regs[GDB_BP] = regs->bp; - gdb_regs[GDB_PC] = regs->ip; +}; + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if ( #ifdef CONFIG_X86_32 - gdb_regs[GDB_PS] = regs->flags; - gdb_regs[GDB_DS] = regs->ds; - gdb_regs[GDB_ES] = regs->es; - gdb_regs[GDB_CS] = regs->cs; - gdb_regs[GDB_FS] = 0xFFFF; - gdb_regs[GDB_GS] = 0xFFFF; - if (user_mode_vm(regs)) { - gdb_regs[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = regs->sp; - } else { - gdb_regs[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_SP] = kernel_stack_pointer(regs); + regno == GDB_SS || regno == GDB_FS || regno == GDB_GS || +#endif + regno == GDB_SP || regno == GDB_ORIG_AX) + return 0; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno == GDB_ORIG_AX) { + memcpy(mem, ®s->orig_ax, sizeof(regs->orig_ax)); + return "orig_ax"; } -#else - gdb_regs[GDB_R8] = regs->r8; - gdb_regs[GDB_R9] = regs->r9; - gdb_regs[GDB_R10] = regs->r10; - gdb_regs[GDB_R11] = regs->r11; - gdb_regs[GDB_R12] = regs->r12; - gdb_regs[GDB_R13] = regs->r13; - gdb_regs[GDB_R14] = regs->r14; - gdb_regs[GDB_R15] = regs->r15; - gdb_regs32[GDB_PS] = regs->flags; - gdb_regs32[GDB_CS] = regs->cs; - gdb_regs32[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = kernel_stack_pointer(regs); + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + + switch (regno) { +#ifdef CONFIG_X86_32 + case GDB_SS: + if (!user_mode_vm(regs)) + *(unsigned long *)mem = __KERNEL_DS; + break; + case GDB_SP: + if (!user_mode_vm(regs)) + *(unsigned long *)mem = kernel_stack_pointer(regs); + break; + case GDB_GS: + case GDB_FS: + *(unsigned long *)mem = 0xFFFF; + break; #endif + } + return dbg_reg_def[regno].name; } /** @@ -150,54 +189,13 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_SP] = p->thread.sp; } -/** - * gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs. - * @gdb_regs: A pointer to hold the registers we've received from GDB. - * @regs: A pointer to a &struct pt_regs to hold these values in. - * - * Convert the GDB regs in @gdb_regs into the pt_regs, and store them - * in @regs. - */ -void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) -{ -#ifndef CONFIG_X86_32 - u32 *gdb_regs32 = (u32 *)gdb_regs; -#endif - regs->ax = gdb_regs[GDB_AX]; - regs->bx = gdb_regs[GDB_BX]; - regs->cx = gdb_regs[GDB_CX]; - regs->dx = gdb_regs[GDB_DX]; - regs->si = gdb_regs[GDB_SI]; - regs->di = gdb_regs[GDB_DI]; - regs->bp = gdb_regs[GDB_BP]; - regs->ip = gdb_regs[GDB_PC]; -#ifdef CONFIG_X86_32 - regs->flags = gdb_regs[GDB_PS]; - regs->ds = gdb_regs[GDB_DS]; - regs->es = gdb_regs[GDB_ES]; - regs->cs = gdb_regs[GDB_CS]; -#else - regs->r8 = gdb_regs[GDB_R8]; - regs->r9 = gdb_regs[GDB_R9]; - regs->r10 = gdb_regs[GDB_R10]; - regs->r11 = gdb_regs[GDB_R11]; - regs->r12 = gdb_regs[GDB_R12]; - regs->r13 = gdb_regs[GDB_R13]; - regs->r14 = gdb_regs[GDB_R14]; - regs->r15 = gdb_regs[GDB_R15]; - regs->flags = gdb_regs32[GDB_PS]; - regs->cs = gdb_regs32[GDB_CS]; - regs->ss = gdb_regs32[GDB_SS]; -#endif -} - static struct hw_breakpoint { unsigned enabled; unsigned long addr; int len; int type; struct perf_event **pev; -} breakinfo[4]; +} breakinfo[HBP_NUM]; static unsigned long early_dr7; @@ -205,7 +203,7 @@ static void kgdb_correct_hw_break(void) { int breakno; - for (breakno = 0; breakno < 4; breakno++) { + for (breakno = 0; breakno < HBP_NUM; breakno++) { struct perf_event *bp; struct arch_hw_breakpoint *info; int val; @@ -292,10 +290,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { int i; - for (i = 0; i < 4; i++) + for (i = 0; i < HBP_NUM; i++) if (breakinfo[i].addr == addr && breakinfo[i].enabled) break; - if (i == 4) + if (i == HBP_NUM) return -1; if (hw_break_release_slot(i)) { @@ -313,7 +311,7 @@ static void kgdb_remove_all_hw_break(void) int cpu = raw_smp_processor_id(); struct perf_event *bp; - for (i = 0; i < 4; i++) { + for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; bp = *per_cpu_ptr(breakinfo[i].pev, cpu); @@ -333,10 +331,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { int i; - for (i = 0; i < 4; i++) + for (i = 0; i < HBP_NUM; i++) if (!breakinfo[i].enabled) break; - if (i == 4) + if (i == HBP_NUM) return -1; switch (bptype) { @@ -397,7 +395,7 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) /* Disable hardware debugging while we are in kgdb: */ set_debugreg(0UL, 7); - for (i = 0; i < 4; i++) { + for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; if (dbg_is_early) { @@ -458,7 +456,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, { unsigned long addr; char *ptr; - int newPC; switch (remcomInBuffer[0]) { case 'c': @@ -469,8 +466,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->ip = addr; case 'D': case 'k': - newPC = linux_regs->ip; - /* clear the trace bit */ linux_regs->flags &= ~X86_EFLAGS_TF; atomic_set(&kgdb_cpu_doing_single_step, -1); @@ -645,7 +640,7 @@ void kgdb_arch_late(void) attr.bp_len = HW_BREAKPOINT_LEN_1; attr.bp_type = HW_BREAKPOINT_W; attr.disabled = 1; - for (i = 0; i < 4; i++) { + for (i = 0; i < HBP_NUM; i++) { if (breakinfo[i].pev) continue; breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index b83e119fbeb0..68128a1b401a 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -13,6 +13,11 @@ config XEN kernel to boot in a paravirtualized environment under the Xen hypervisor. +config XEN_PVHVM + def_bool y + depends on XEN + depends on X86_LOCAL_APIC + config XEN_MAX_DOMAIN_MEMORY int "Maximum allowed size of a domain in gigabytes" default 8 if X86_32 diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3bb4fc21f4f2..930954685980 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -12,7 +12,7 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ - grant-table.o suspend.o + grant-table.o suspend.o platform-pci-unplug.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 65d8d79b46a8..d4ff5e83621d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -11,6 +11,7 @@ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 */ +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/smp.h> @@ -35,8 +36,10 @@ #include <xen/interface/version.h> #include <xen/interface/physdev.h> #include <xen/interface/vcpu.h> +#include <xen/interface/memory.h> #include <xen/features.h> #include <xen/page.h> +#include <xen/hvm.h> #include <xen/hvc-console.h> #include <asm/paravirt.h> @@ -55,7 +58,9 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/reboot.h> +#include <asm/setup.h> #include <asm/stackprotector.h> +#include <asm/hypervisor.h> #include "xen-ops.h" #include "mmu.h" @@ -76,6 +81,10 @@ struct shared_info xen_dummy_shared_info; void *xen_initial_gdt; +RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); +__read_mostly int xen_have_vector_callback; +EXPORT_SYMBOL_GPL(xen_have_vector_callback); + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -97,6 +106,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; */ static int have_vcpu_info_placement = 1; +static void clamp_max_cpus(void) +{ +#ifdef CONFIG_SMP + if (setup_max_cpus > MAX_VIRT_CPUS) + setup_max_cpus = MAX_VIRT_CPUS; +#endif +} + static void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; @@ -104,13 +121,17 @@ static void xen_vcpu_setup(int cpu) struct vcpu_info *vcpup; BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - if (!have_vcpu_info_placement) - return; /* already tested, not available */ + if (cpu < MAX_VIRT_CPUS) + per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - vcpup = &per_cpu(xen_vcpu_info, cpu); + if (!have_vcpu_info_placement) { + if (cpu >= MAX_VIRT_CPUS) + clamp_max_cpus(); + return; + } + vcpup = &per_cpu(xen_vcpu_info, cpu); info.mfn = arbitrary_virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); @@ -125,6 +146,7 @@ static void xen_vcpu_setup(int cpu) if (err) { printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); have_vcpu_info_placement = 0; + clamp_max_cpus(); } else { /* This cpu is using the registered vcpu info, even if later ones fail to. */ @@ -731,7 +753,6 @@ static void set_xen_basic_apic_ops(void) #endif - static void xen_clts(void) { struct multicall_space mcs; @@ -926,10 +947,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, }; -static const struct pv_time_ops xen_time_ops __initdata = { - .sched_clock = xen_sched_clock, -}; - static const struct pv_cpu_ops xen_cpu_ops __initdata = { .cpuid = xen_cpuid, @@ -1028,6 +1045,23 @@ static void xen_crash_shutdown(struct pt_regs *regs) xen_reboot(SHUTDOWN_crash); } +static int +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + xen_reboot(SHUTDOWN_crash); + return NOTIFY_DONE; +} + +static struct notifier_block xen_panic_block = { + .notifier_call= xen_panic_event, +}; + +int xen_panic_handler_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); + return 0; +} + static const struct machine_ops __initdata xen_machine_ops = { .restart = xen_restart, .halt = xen_machine_halt, @@ -1067,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; - pv_time_ops = xen_time_ops; pv_cpu_ops = xen_cpu_ops; pv_apic_ops = xen_apic_ops; @@ -1075,13 +1108,7 @@ asmlinkage void __init xen_start_kernel(void) x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; - x86_init.timers.timer_init = xen_time_init; - x86_init.timers.setup_percpu_clockev = x86_init_noop; - x86_cpuinit.setup_percpu_clockev = x86_init_noop; - - x86_platform.calibrate_tsc = xen_tsc_khz; - x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; + xen_init_time_ops(); /* * Set up some pagetable state before starting to set any ptes. @@ -1206,3 +1233,139 @@ asmlinkage void __init xen_start_kernel(void) x86_64_start_reservations((char *)__pa_symbol(&boot_params)); #endif } + +static uint32_t xen_cpuid_base(void) +{ + uint32_t base, eax, ebx, ecx, edx; + char signature[13]; + + for (base = 0x40000000; base < 0x40010000; base += 0x100) { + cpuid(base, &eax, &ebx, &ecx, &edx); + *(uint32_t *)(signature + 0) = ebx; + *(uint32_t *)(signature + 4) = ecx; + *(uint32_t *)(signature + 8) = edx; + signature[12] = 0; + + if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) + return base; + } + + return 0; +} + +static int init_hvm_pv_info(int *major, int *minor) +{ + uint32_t eax, ebx, ecx, edx, pages, msr, base; + u64 pfn; + + base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + *major = eax >> 16; + *minor = eax & 0xffff; + printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); + + cpuid(base + 2, &pages, &msr, &ecx, &edx); + + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + xen_setup_features(); + + pv_info = xen_info; + pv_info.kernel_rpl = 0; + + xen_domain_type = XEN_HVM_DOMAIN; + + return 0; +} + +void xen_hvm_init_shared_info(void) +{ + int cpu; + struct xen_add_to_physmap xatp; + static struct shared_info *shared_info_page = 0; + + if (!shared_info_page) + shared_info_page = (struct shared_info *) + extend_brk(PAGE_SIZE, PAGE_SIZE); + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); + + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; + + /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info + * page, we use it in the event channel upcall and in some pvclock + * related functions. We don't need the vcpu_info placement + * optimizations because we don't use any pv_mmu or pv_irq op on + * HVM. + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_init_shared_info is run at resume time too and + * in that case multiple vcpus might be online. */ + for_each_online_cpu(cpu) { + per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + } +} + +#ifdef CONFIG_XEN_PVHVM +static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + switch (action) { + case CPU_UP_PREPARE: + per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { + .notifier_call = xen_hvm_cpu_notify, +}; + +static void __init xen_hvm_guest_init(void) +{ + int r; + int major, minor; + + r = init_hvm_pv_info(&major, &minor); + if (r < 0) + return; + + xen_hvm_init_shared_info(); + + if (xen_feature(XENFEAT_hvm_callback_vector)) + xen_have_vector_callback = 1; + register_cpu_notifier(&xen_hvm_cpu_notifier); + xen_unplug_emulated_devices(); + have_vcpu_info_placement = 0; + x86_init.irqs.intr_init = xen_init_IRQ; + xen_hvm_init_time_ops(); + xen_hvm_init_mmu_ops(); +} + +static bool __init xen_hvm_platform(void) +{ + if (xen_pv_domain()) + return false; + + if (!xen_cpuid_base()) + return false; + + return true; +} + +const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { + .name = "Xen HVM", + .detect = xen_hvm_platform, + .init_platform = xen_hvm_guest_init, +}; +EXPORT_SYMBOL(x86_hyper_xen_hvm); +#endif diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 914f04695ce5..413b19b3d0fe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -58,6 +58,7 @@ #include <xen/page.h> #include <xen/interface/xen.h> +#include <xen/interface/hvm/hvm_op.h> #include <xen/interface/version.h> #include <xen/hvc-console.h> @@ -1941,6 +1942,40 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; } +#ifdef CONFIG_XEN_PVHVM +static void xen_hvm_exit_mmap(struct mm_struct *mm) +{ + struct xen_hvm_pagetable_dying a; + int rc; + + a.domid = DOMID_SELF; + a.gpa = __pa(mm->pgd); + rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); + WARN_ON_ONCE(rc < 0); +} + +static int is_pagetable_dying_supported(void) +{ + struct xen_hvm_pagetable_dying a; + int rc = 0; + + a.domid = DOMID_SELF; + a.gpa = 0x00; + rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); + if (rc < 0) { + printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n"); + return 0; + } + return 1; +} + +void __init xen_hvm_init_mmu_ops(void) +{ + if (is_pagetable_dying_supported()) + pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; +} +#endif + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe6bc7f5ecf..fa938c4aa2f7 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -60,4 +60,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, unsigned long xen_read_cr2_direct(void); extern void xen_init_mmu_ops(void); +extern void xen_hvm_init_mmu_ops(void); #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c new file mode 100644 index 000000000000..554c002a1e1a --- /dev/null +++ b/arch/x86/xen/platform-pci-unplug.c @@ -0,0 +1,137 @@ +/****************************************************************************** + * platform-pci-unplug.c + * + * Xen platform PCI device driver + * Copyright (c) 2010, Citrix + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <xen/platform_pci.h> + +#define XEN_PLATFORM_ERR_MAGIC -1 +#define XEN_PLATFORM_ERR_PROTOCOL -2 +#define XEN_PLATFORM_ERR_BLACKLIST -3 + +/* store the value of xen_emul_unplug after the unplug is done */ +int xen_platform_pci_unplug; +EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); +#ifdef CONFIG_XEN_PVHVM +static int xen_emul_unplug; + +static int __init check_platform_magic(void) +{ + short magic; + char protocol; + + magic = inw(XEN_IOPORT_MAGIC); + if (magic != XEN_IOPORT_MAGIC_VAL) { + printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n"); + return XEN_PLATFORM_ERR_MAGIC; + } + + protocol = inb(XEN_IOPORT_PROTOVER); + + printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n", + protocol); + + switch (protocol) { + case 1: + outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM); + outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER); + if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) { + printk(KERN_ERR "Xen Platform: blacklisted by host\n"); + return XEN_PLATFORM_ERR_BLACKLIST; + } + break; + default: + printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version"); + return XEN_PLATFORM_ERR_PROTOCOL; + } + + return 0; +} + +void __init xen_unplug_emulated_devices(void) +{ + int r; + + /* check the version of the xen platform PCI device */ + r = check_platform_magic(); + /* If the version matches enable the Xen platform PCI driver. + * Also enable the Xen platform PCI driver if the version is really old + * and the user told us to ignore it. */ + if (r && !(r == XEN_PLATFORM_ERR_MAGIC && + (xen_emul_unplug & XEN_UNPLUG_IGNORE))) + return; + /* Set the default value of xen_emul_unplug depending on whether or + * not the Xen PV frontends and the Xen platform PCI driver have + * been compiled for this kernel (modules or built-in are both OK). */ + if (!xen_emul_unplug) { + if (xen_must_unplug_nics()) { + printk(KERN_INFO "Netfront and the Xen platform PCI driver have " + "been compiled for this kernel: unplug emulated NICs.\n"); + xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; + } + if (xen_must_unplug_disks()) { + printk(KERN_INFO "Blkfront and the Xen platform PCI driver have " + "been compiled for this kernel: unplug emulated disks.\n" + "You might have to change the root device\n" + "from /dev/hd[a-d] to /dev/xvd[a-d]\n" + "in your root= kernel command line option\n"); + xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; + } + } + /* Now unplug the emulated devices */ + if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE)) + outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); + xen_platform_pci_unplug = xen_emul_unplug; +} + +static int __init parse_xen_emul_unplug(char *arg) +{ + char *p, *q; + int l; + + for (p = arg; p; p = q) { + q = strchr(p, ','); + if (q) { + l = q - p; + q++; + } else { + l = strlen(p); + } + if (!strncmp(p, "all", l)) + xen_emul_unplug |= XEN_UNPLUG_ALL; + else if (!strncmp(p, "ide-disks", l)) + xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; + else if (!strncmp(p, "aux-ide-disks", l)) + xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; + else if (!strncmp(p, "nics", l)) + xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; + else if (!strncmp(p, "ignore", l)) + xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else + printk(KERN_WARNING "unrecognised option '%s' " + "in parameter 'xen_emul_unplug'\n", p); + } + return 0; +} +early_param("xen_emul_unplug", parse_xen_emul_unplug); +#endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ad0047f47cd4..328b00305426 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -20,6 +20,7 @@ #include <xen/page.h> #include <xen/interface/callback.h> #include <xen/interface/physdev.h> +#include <xen/interface/memory.h> #include <xen/features.h> #include "xen-ops.h" @@ -32,6 +33,73 @@ extern void xen_sysenter_target(void); extern void xen_syscall_target(void); extern void xen_syscall32_target(void); +static unsigned long __init xen_release_chunk(phys_addr_t start_addr, + phys_addr_t end_addr) +{ + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + unsigned long start, end; + unsigned long len = 0; + unsigned long pfn; + int ret; + + start = PFN_UP(start_addr); + end = PFN_DOWN(end_addr); + + if (end <= start) + return 0; + + printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", + start, end); + for(pfn = start; pfn < end; pfn++) { + unsigned long mfn = pfn_to_mfn(pfn); + + /* Make sure pfn exists to start with */ + if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) + continue; + + set_xen_guest_handle(reservation.extent_start, &mfn); + reservation.nr_extents = 1; + + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", + start, end, ret); + if (ret == 1) { + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + len++; + } + } + printk(KERN_CONT "%ld pages freed\n", len); + + return len; +} + +static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, + const struct e820map *e820) +{ + phys_addr_t max_addr = PFN_PHYS(max_pfn); + phys_addr_t last_end = 0; + unsigned long released = 0; + int i; + + for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { + phys_addr_t end = e820->map[i].addr; + end = min(max_addr, end); + + released += xen_release_chunk(last_end, end); + last_end = e820->map[i].addr + e820->map[i].size; + } + + if (last_end < max_addr) + released += xen_release_chunk(last_end, max_addr); + + printk(KERN_INFO "released %ld pages of unused memory\n", released); + return released; +} /** * machine_specific_memory_setup - Hook for machine specific memory setup. @@ -67,6 +135,8 @@ char * __init xen_memory_setup(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + xen_return_unused_memory(xen_start_info->nr_pages, &e820); + return "Xen"; } @@ -156,6 +226,8 @@ void __init xen_arch_setup(void) struct physdev_set_iopl set_iopl; int rc; + xen_panic_handler_init(); + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a29693fd3138..25f232b18a82 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -394,6 +394,8 @@ static void stop_self(void *v) load_cr3(swapper_pg_dir); /* should set up a minimal gdt */ + set_cpu_online(cpu, false); + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); BUG(); } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index a9c661108034..1d789d56877c 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -26,6 +26,18 @@ void xen_pre_suspend(void) BUG(); } +void xen_hvm_post_suspend(int suspend_cancelled) +{ + int cpu; + xen_hvm_init_shared_info(); + xen_callback_vector(); + if (xen_feature(XENFEAT_hvm_safe_pvclock)) { + for_each_online_cpu(cpu) { + xen_setup_runstate_info(cpu); + } + } +} + void xen_post_suspend(int suspend_cancelled) { xen_build_mfn_list_list(); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b3c6c59ed302..1a5353a753fc 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -20,6 +20,7 @@ #include <asm/xen/hypercall.h> #include <xen/events.h> +#include <xen/features.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> @@ -155,47 +156,8 @@ static void do_stolen_accounting(void) account_idle_ticks(ticks); } -/* - * Xen sched_clock implementation. Returns the number of unstolen - * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED - * states. - */ -unsigned long long xen_sched_clock(void) -{ - struct vcpu_runstate_info state; - cycle_t now; - u64 ret; - s64 offset; - - /* - * Ideally sched_clock should be called on a per-cpu basis - * anyway, so preempt should already be disabled, but that's - * not current practice at the moment. - */ - preempt_disable(); - - now = xen_clocksource_read(); - - get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - offset = now - state.state_entry_time; - if (offset < 0) - offset = 0; - - ret = state.time[RUNSTATE_blocked] + - state.time[RUNSTATE_running] + - offset; - - preempt_enable(); - - return ret; -} - - /* Get the TSC speed from Xen */ -unsigned long xen_tsc_khz(void) +static unsigned long xen_tsc_khz(void) { struct pvclock_vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_info[0].time; @@ -230,7 +192,7 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -unsigned long xen_get_wallclock(void) +static unsigned long xen_get_wallclock(void) { struct timespec ts; @@ -238,7 +200,7 @@ unsigned long xen_get_wallclock(void) return ts.tv_sec; } -int xen_set_wallclock(unsigned long now) +static int xen_set_wallclock(unsigned long now) { /* do nothing for domU */ return -1; @@ -473,7 +435,11 @@ void xen_timer_resume(void) } } -__init void xen_time_init(void) +static const struct pv_time_ops xen_time_ops __initdata = { + .sched_clock = xen_clocksource_read, +}; + +static __init void xen_time_init(void) { int cpu = smp_processor_id(); struct timespec tp; @@ -497,3 +463,47 @@ __init void xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); } + +__init void xen_init_time_ops(void) +{ + pv_time_ops = xen_time_ops; + + x86_init.timers.timer_init = xen_time_init; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + x86_cpuinit.setup_percpu_clockev = x86_init_noop; + + x86_platform.calibrate_tsc = xen_tsc_khz; + x86_platform.get_wallclock = xen_get_wallclock; + x86_platform.set_wallclock = xen_set_wallclock; +} + +#ifdef CONFIG_XEN_PVHVM +static void xen_hvm_setup_cpu_clockevents(void) +{ + int cpu = smp_processor_id(); + xen_setup_runstate_info(cpu); + xen_setup_timer(cpu); + xen_setup_cpu_clockevents(); +} + +__init void xen_hvm_init_time_ops(void) +{ + /* vector callback is needed otherwise we cannot receive interrupts + * on cpu > 0 */ + if (!xen_have_vector_callback && num_present_cpus() > 1) + return; + if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { + printk(KERN_INFO "Xen doesn't support pvclock on HVM," + "disable pv timer\n"); + return; + } + + pv_time_ops = xen_time_ops; + x86_init.timers.setup_percpu_clockev = xen_time_init; + x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; + + x86_platform.calibrate_tsc = xen_tsc_khz; + x86_platform.get_wallclock = xen_get_wallclock; + x86_platform.set_wallclock = xen_set_wallclock; +} +#endif diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f9153a300bce..7c8ab86163e9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -38,6 +38,10 @@ void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); +void xen_callback_vector(void); +void xen_hvm_init_shared_info(void); +void __init xen_unplug_emulated_devices(void); + void __init xen_build_dynamic_phys_to_machine(void); void xen_init_irq_ops(void); @@ -46,11 +50,8 @@ void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); -unsigned long xen_tsc_khz(void); -void __init xen_time_init(void); -unsigned long xen_get_wallclock(void); -int xen_set_wallclock(unsigned long time); -unsigned long long xen_sched_clock(void); +void __init xen_init_time_ops(void); +void __init xen_hvm_init_time_ops(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); @@ -101,4 +102,6 @@ void xen_sysret32(void); void xen_sysret64(void); void xen_adjust_exception_frame(void); +extern int xen_panic_handler_init(void); + #endif /* XEN_OPS_H */ |