summaryrefslogtreecommitdiffstats
path: root/arch/um/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/Makefile4
-rw-r--r--arch/um/kernel/dtb.c3
-rw-r--r--arch/um/kernel/dyn.lds.S5
-rw-r--r--arch/um/kernel/exec.c12
-rw-r--r--arch/um/kernel/initrd.c1
-rw-r--r--arch/um/kernel/irq.c272
-rw-r--r--arch/um/kernel/kmsg_dump.c4
-rw-r--r--arch/um/kernel/ksyms.c2
-rw-r--r--arch/um/kernel/load_file.c4
-rw-r--r--arch/um/kernel/maccess.c19
-rw-r--r--arch/um/kernel/mem.c65
-rw-r--r--arch/um/kernel/physmem.c44
-rw-r--r--arch/um/kernel/process.c159
-rw-r--r--arch/um/kernel/ptrace.c3
-rw-r--r--arch/um/kernel/reboot.c18
-rw-r--r--arch/um/kernel/sigio.c26
-rw-r--r--arch/um/kernel/skas/.gitignore2
-rw-r--r--arch/um/kernel/skas/Makefile41
-rw-r--r--arch/um/kernel/skas/clone.c48
-rw-r--r--arch/um/kernel/skas/mmu.c39
-rw-r--r--arch/um/kernel/skas/process.c31
-rw-r--r--arch/um/kernel/skas/stub.c59
-rw-r--r--arch/um/kernel/skas/stub_exe.c95
-rw-r--r--arch/um/kernel/skas/stub_exe_embed.S11
-rw-r--r--arch/um/kernel/skas/syscall.c45
-rw-r--r--arch/um/kernel/sysrq.c9
-rw-r--r--arch/um/kernel/time.c216
-rw-r--r--arch/um/kernel/tlb.c572
-rw-r--r--arch/um/kernel/trap.c59
-rw-r--r--arch/um/kernel/um_arch.c95
-rw-r--r--arch/um/kernel/um_arch.h2
-rw-r--r--arch/um/kernel/uml.lds.S2
32 files changed, 866 insertions, 1101 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 811188be954c..4df1cd0d2017 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -17,7 +17,7 @@ extra-y := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
signal.o sysrq.o time.o tlb.o trap.o \
- um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
+ um_arch.o umid.o kmsg_dump.o capflags.o skas/
obj-y += load_file.o
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
@@ -47,7 +47,7 @@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE
$(call if_changed,quote2)
quiet_cmd_mkcapflags = MKCAP $@
- cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^
+ cmd_mkcapflags = $(CONFIG_SHELL) $(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^
cpufeature = $(src)/../../x86/include/asm/cpufeatures.h
vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h
diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c
index 4954188a6a09..15c342426489 100644
--- a/arch/um/kernel/dtb.c
+++ b/arch/um/kernel/dtb.c
@@ -17,7 +17,7 @@ void uml_dtb_init(void)
area = uml_load_file(dtb, &size);
if (area) {
- if (!early_init_dt_scan(area)) {
+ if (!early_init_dt_scan(area, __pa(area))) {
pr_err("invalid DTB %s\n", dtb);
memblock_free(area, size);
return;
@@ -31,6 +31,7 @@ void uml_dtb_init(void)
static int __init uml_dtb_setup(char *line, int *add)
{
+ *add = 0;
dtb = line;
return 0;
}
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 3385d653ebd0..a36b7918a011 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -116,8 +116,6 @@ SECTIONS
.fini_array : { *(.fini_array) }
.data : {
INIT_TASK_DATA(KERNEL_STACK_SIZE)
- . = ALIGN(KERNEL_STACK_SIZE);
- *(.data..init_irqstack)
DATA_DATA
*(.data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -178,3 +176,6 @@ SECTIONS
DISCARDS
}
+
+ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE,
+ "STUB code must not be larger than one page");
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 827a0d3fa589..cb8b5cd9285c 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -22,17 +22,8 @@
void flush_thread(void)
{
- void *data = NULL;
- int ret;
-
arch_flush_thread(&current->thread.arch);
- ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
- if (ret) {
- printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
- __func__, ret);
- force_sig(SIGKILL);
- }
get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp);
@@ -44,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
PT_REGS_IP(regs) = eip;
PT_REGS_SP(regs) = esp;
clear_thread_flag(TIF_SINGLESTEP);
-#ifdef SUBARCH_EXECVE1
- SUBARCH_EXECVE1(regs->regs);
-#endif
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 47b8cb1a1156..99dba827461c 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -34,6 +34,7 @@ int __init read_initrd(void)
static int __init uml_initrd_setup(char *line, int *add)
{
+ *add = 0;
initrd = line;
return 0;
}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 635d44606bfe..abe8f30a521c 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -37,7 +37,7 @@ struct irq_reg {
bool pending;
bool wakeup;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
- bool pending_on_resume;
+ bool pending_event;
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *);
struct time_travel_event event;
@@ -52,10 +52,13 @@ struct irq_entry {
bool sigio_workaround;
};
-static DEFINE_SPINLOCK(irq_lock);
+static DEFINE_RAW_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended;
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+static bool irqs_pending;
+#endif
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
{
@@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev)
{
struct irq_reg *reg = container_of(ev, struct irq_reg, event);
- /* do nothing if suspended - just to cause a wakeup */
- if (irqs_suspended)
+ /* do nothing if suspended; just cause a wakeup and mark as pending */
+ if (irqs_suspended) {
+ irqs_pending = true;
+ reg->pending_event = true;
return;
+ }
generic_handle_irq(reg->irq);
}
@@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
if (!reg->event.pending)
return false;
- if (irqs_suspended)
- reg->pending_on_resume = true;
return true;
}
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+ struct irq_entry *entry;
+
+ if (!irqs_pending || timetravel_handlers_only)
+ return;
+
+ irqs_pending = false;
+
+ list_for_each_entry(entry, &active_fds, list) {
+ enum um_irq_type t;
+
+ for (t = 0; t < NUM_IRQ_TYPES; t++) {
+ struct irq_reg *reg = &entry->reg[t];
+
+ /*
+ * Any timetravel_handler was invoked already, just
+ * directly run the IRQ.
+ */
+ if (reg->pending_event) {
+ irq_enter();
+ generic_handle_irq(reg->irq);
+ irq_exit();
+ reg->pending_event = false;
+ }
+ }
+ }
+}
#else
static bool irq_do_timetravel_handler(struct irq_entry *entry,
enum um_irq_type t)
{
return false;
}
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+}
#endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
@@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
*/
if (timetravel_handlers_only) {
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ reg->pending_event = true;
+ irqs_pending = true;
mark_sigio_pending();
#endif
return;
@@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs,
if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
return;
+ /* Flush out pending events that were ignored due to time-travel. */
+ if (!irqs_suspended)
+ irq_do_pending_events(timetravel_handlers_only);
+
while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs
* which have trigger it so there is no need to walk the irq
@@ -193,9 +236,12 @@ static void _sigio_handler(struct uml_pt_regs *regs,
free_irqs();
}
-void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+ void *mc)
{
+ preempt_disable();
_sigio_handler(regs, irqs_suspended);
+ preempt_enable();
}
static struct irq_entry *get_irq_entry_by_fd(int fd)
@@ -212,7 +258,7 @@ static struct irq_entry *get_irq_entry_by_fd(int fd)
return NULL;
}
-static void free_irq_entry(struct irq_entry *to_free, bool remove)
+static void remove_irq_entry(struct irq_entry *to_free, bool remove)
{
if (!to_free)
return;
@@ -220,7 +266,6 @@ static void free_irq_entry(struct irq_entry *to_free, bool remove)
if (remove)
os_del_epoll_fd(to_free->fd);
list_del(&to_free->list);
- kfree(to_free);
}
static bool update_irq_entry(struct irq_entry *entry)
@@ -241,17 +286,19 @@ static bool update_irq_entry(struct irq_entry *entry)
return false;
}
-static void update_or_free_irq_entry(struct irq_entry *entry)
+static struct irq_entry *update_or_remove_irq_entry(struct irq_entry *entry)
{
- if (!update_irq_entry(entry))
- free_irq_entry(entry, false);
+ if (update_irq_entry(entry))
+ return NULL;
+ remove_irq_entry(entry, false);
+ return entry;
}
static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *))
{
- struct irq_entry *irq_entry;
+ struct irq_entry *irq_entry, *to_free = NULL;
int err, events = os_event_mask(type);
unsigned long flags;
@@ -259,9 +306,10 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
if (err < 0)
goto out;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
irq_entry = get_irq_entry_by_fd(fd);
if (irq_entry) {
+already:
/* cannot register the same FD twice with the same type */
if (WARN_ON(irq_entry->reg[type].events)) {
err = -EALREADY;
@@ -271,11 +319,22 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
/* temporarily disable to avoid IRQ-side locking */
os_del_epoll_fd(fd);
} else {
- irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
- if (!irq_entry) {
- err = -ENOMEM;
- goto out_unlock;
+ struct irq_entry *new;
+
+ /* don't restore interrupts */
+ raw_spin_unlock(&irq_lock);
+ new = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
+ if (!new) {
+ local_irq_restore(flags);
+ return -ENOMEM;
+ }
+ raw_spin_lock(&irq_lock);
+ irq_entry = get_irq_entry_by_fd(fd);
+ if (irq_entry) {
+ to_free = new;
+ goto already;
}
+ irq_entry = new;
irq_entry->fd = fd;
list_add_tail(&irq_entry->list, &active_fds);
maybe_sigio_broken(fd);
@@ -294,12 +353,11 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
#endif
WARN_ON(!update_irq_entry(irq_entry));
- spin_unlock_irqrestore(&irq_lock, flags);
-
- return 0;
+ err = 0;
out_unlock:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
out:
+ kfree(to_free);
return err;
}
@@ -313,19 +371,20 @@ void free_irq_by_fd(int fd)
struct irq_entry *to_free;
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
to_free = get_irq_entry_by_fd(fd);
- free_irq_entry(to_free, true);
- spin_unlock_irqrestore(&irq_lock, flags);
+ remove_irq_entry(to_free, true);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(to_free);
}
EXPORT_SYMBOL(free_irq_by_fd);
static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
{
- struct irq_entry *entry;
+ struct irq_entry *entry, *to_free = NULL;
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type i;
@@ -341,12 +400,13 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
os_del_epoll_fd(entry->fd);
reg->events = 0;
- update_or_free_irq_entry(entry);
+ to_free = update_or_remove_irq_entry(entry);
goto out;
}
}
out:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(to_free);
}
void deactivate_fd(int fd, int irqnum)
@@ -357,7 +417,7 @@ void deactivate_fd(int fd, int irqnum)
os_del_epoll_fd(fd);
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
entry = get_irq_entry_by_fd(fd);
if (!entry)
goto out;
@@ -369,9 +429,10 @@ void deactivate_fd(int fd, int irqnum)
entry->reg[i].events = 0;
}
- update_or_free_irq_entry(entry);
+ entry = update_or_remove_irq_entry(entry);
out:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(entry);
ignore_sigio_fd(fd);
}
@@ -501,7 +562,7 @@ void um_irqs_suspend(void)
irqs_suspended = true;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
bool clear = true;
@@ -534,7 +595,7 @@ void um_irqs_suspend(void)
!__ignore_sigio_fd(entry->fd);
}
}
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
}
void um_irqs_resume(void)
@@ -543,30 +604,7 @@ void um_irqs_resume(void)
unsigned long flags;
- local_irq_save(flags);
-#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
- /*
- * We don't need to lock anything here since we're in resume
- * and nothing else is running, but have disabled IRQs so we
- * don't try anything else with the interrupt list from there.
- */
- list_for_each_entry(entry, &active_fds, list) {
- enum um_irq_type t;
-
- for (t = 0; t < NUM_IRQ_TYPES; t++) {
- struct irq_reg *reg = &entry->reg[t];
-
- if (reg->pending_on_resume) {
- irq_enter();
- generic_handle_irq(reg->irq);
- irq_exit();
- reg->pending_on_resume = false;
- }
- }
- }
-#endif
-
- spin_lock(&irq_lock);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
if (entry->suspended) {
int err = os_set_fd_async(entry->fd);
@@ -580,7 +618,7 @@ void um_irqs_resume(void)
}
}
}
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
irqs_suspended = false;
send_sigio_to_self();
@@ -591,7 +629,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
struct irq_entry *entry;
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
@@ -606,7 +644,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
}
}
unlock:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
return 0;
}
#else
@@ -652,115 +690,3 @@ void __init init_IRQ(void)
/* Initialize EPOLL Loop */
os_setup_epoll();
}
-
-/*
- * IRQ stack entry and exit:
- *
- * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
- * and switch over to the IRQ stack after some preparation. We use
- * sigaltstack to receive signals on a separate stack from the start.
- * These two functions make sure the rest of the kernel won't be too
- * upset by being on a different stack. The IRQ stack has a
- * thread_info structure at the bottom so that current et al continue
- * to work.
- *
- * to_irq_stack copies the current task's thread_info to the IRQ stack
- * thread_info and sets the tasks's stack to point to the IRQ stack.
- *
- * from_irq_stack copies the thread_info struct back (flags may have
- * been modified) and resets the task's stack pointer.
- *
- * Tricky bits -
- *
- * What happens when two signals race each other? UML doesn't block
- * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
- * could arrive while a previous one is still setting up the
- * thread_info.
- *
- * There are three cases -
- * The first interrupt on the stack - sets up the thread_info and
- * handles the interrupt
- * A nested interrupt interrupting the copying of the thread_info -
- * can't handle the interrupt, as the stack is in an unknown state
- * A nested interrupt not interrupting the copying of the
- * thread_info - doesn't do any setup, just handles the interrupt
- *
- * The first job is to figure out whether we interrupted stack setup.
- * This is done by xchging the signal mask with thread_info->pending.
- * If the value that comes back is zero, then there is no setup in
- * progress, and the interrupt can be handled. If the value is
- * non-zero, then there is stack setup in progress. In order to have
- * the interrupt handled, we leave our signal in the mask, and it will
- * be handled by the upper handler after it has set up the stack.
- *
- * Next is to figure out whether we are the outer handler or a nested
- * one. As part of setting up the stack, thread_info->real_thread is
- * set to non-NULL (and is reset to NULL on exit). This is the
- * nesting indicator. If it is non-NULL, then the stack is already
- * set up and the handler can run.
- */
-
-static unsigned long pending_mask;
-
-unsigned long to_irq_stack(unsigned long *mask_out)
-{
- struct thread_info *ti;
- unsigned long mask, old;
- int nested;
-
- mask = xchg(&pending_mask, *mask_out);
- if (mask != 0) {
- /*
- * If any interrupts come in at this point, we want to
- * make sure that their bits aren't lost by our
- * putting our bit in. So, this loop accumulates bits
- * until xchg returns the same value that we put in.
- * When that happens, there were no new interrupts,
- * and pending_mask contains a bit for each interrupt
- * that came in.
- */
- old = *mask_out;
- do {
- old |= mask;
- mask = xchg(&pending_mask, old);
- } while (mask != old);
- return 1;
- }
-
- ti = current_thread_info();
- nested = (ti->real_thread != NULL);
- if (!nested) {
- struct task_struct *task;
- struct thread_info *tti;
-
- task = cpu_tasks[ti->cpu].task;
- tti = task_thread_info(task);
-
- *ti = *tti;
- ti->real_thread = tti;
- task->stack = ti;
- }
-
- mask = xchg(&pending_mask, 0);
- *mask_out |= mask | nested;
- return 0;
-}
-
-unsigned long from_irq_stack(int nested)
-{
- struct thread_info *ti, *to;
- unsigned long mask;
-
- ti = current_thread_info();
-
- pending_mask = 1;
-
- to = ti->real_thread;
- current->stack = to;
- ti->real_thread = NULL;
- *to = *ti;
-
- mask = xchg(&pending_mask, 0);
- return mask & ~1;
-}
-
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 427dd5a61a38..419021175272 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -8,7 +8,7 @@
#include <os.h>
static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ struct kmsg_dump_detail *detail)
{
static struct kmsg_dump_iter iter;
static DEFINE_SPINLOCK(lock);
@@ -57,7 +57,7 @@ static struct kmsg_dumper kmsg_dumper = {
.dump = kmsg_dumper_stdout
};
-int __init kmsg_dumper_stdout_init(void)
+static int __init kmsg_dumper_stdout_init(void)
{
return kmsg_dump_register(&kmsg_dumper);
}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 3a85bde3e173..f2fb77da08cf 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket);
EXPORT_SYMBOL(os_create_unix_socket);
EXPORT_SYMBOL(os_connect_socket);
EXPORT_SYMBOL(os_accept_connection);
-EXPORT_SYMBOL(os_rcv_fd);
+EXPORT_SYMBOL(os_rcv_fd_msg);
EXPORT_SYMBOL(run_helper);
EXPORT_SYMBOL(os_major);
EXPORT_SYMBOL(os_minor);
diff --git a/arch/um/kernel/load_file.c b/arch/um/kernel/load_file.c
index 5cecd0e291fb..cb9d178ab7d8 100644
--- a/arch/um/kernel/load_file.c
+++ b/arch/um/kernel/load_file.c
@@ -48,9 +48,7 @@ void *uml_load_file(const char *filename, unsigned long long *size)
return NULL;
}
- area = memblock_alloc(*size, SMP_CACHE_BYTES);
- if (!area)
- panic("%s: Failed to allocate %llu bytes\n", __func__, *size);
+ area = memblock_alloc_or_panic(*size, SMP_CACHE_BYTES);
if (__uml_load_file(filename, area, *size)) {
memblock_free(area, *size);
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
deleted file mode 100644
index 8ccd56813f68..000000000000
--- a/arch/um/kernel/maccess.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2013 Richard Weinberger <richrd@nod.at>
- */
-
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <os.h>
-
-bool copy_from_kernel_nofault_allowed(const void *src, size_t size)
-{
- void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
-
- if ((unsigned long)src < PAGE_SIZE || size <= 0)
- return false;
- if (os_mincore(psrc, size + src - psrc) <= 0)
- return false;
- return true;
-}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 38d5a71a579b..76bec7de81b5 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -6,18 +6,20 @@
#include <linux/stddef.h>
#include <linux/module.h>
#include <linux/memblock.h>
-#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
-#include <asm/fixmap.h>
+#include <linux/init.h>
+#include <asm/sections.h>
#include <asm/page.h>
+#include <asm/pgalloc.h>
#include <as-layout.h>
#include <init.h>
#include <kern.h>
#include <kern_util.h>
#include <mem_user.h>
#include <os.h>
+#include <um_malloc.h>
#include <linux/sched/task.h>
#ifdef CONFIG_KASAN
@@ -49,14 +51,12 @@ EXPORT_SYMBOL(empty_zero_page);
pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* Initialized at boot time, and readonly after that */
-unsigned long long highmem;
-EXPORT_SYMBOL(highmem);
int kmalloc_ok = 0;
/* Used during early boot */
static unsigned long brk_end;
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
{
/* clear the zero-page */
memset(empty_zero_page, 0, PAGE_SIZE);
@@ -68,14 +68,16 @@ void __init mem_init(void)
map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
memblock_free((void *)brk_end, uml_reserved - brk_end);
uml_reserved = brk_end;
-
- /* this will put all low memory onto the freelists */
- memblock_free_all();
- max_low_pfn = totalram_pages();
+ min_low_pfn = PFN_UP(__pa(uml_reserved));
max_pfn = max_low_pfn;
+}
+
+void __init mem_init(void)
+{
kmalloc_ok = 1;
}
+#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA)
/*
* Create a page table and place a pointer to it in a middle page
* directory entry.
@@ -97,7 +99,7 @@ static void __init one_page_table_init(pmd_t *pmd)
static void __init one_md_table_init(pud_t *pud)
{
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 2
pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
if (!pmd_table)
panic("%s: Failed to allocate %lu bytes align=%lx\n",
@@ -108,6 +110,19 @@ static void __init one_md_table_init(pud_t *pud)
#endif
}
+static void __init one_ud_table_init(p4d_t *p4d)
+{
+#if CONFIG_PGTABLE_LEVELS > 3
+ pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+ if (!pud_table)
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE);
+
+ set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table)));
+ BUG_ON(pud_table != pud_offset(p4d, 0));
+#endif
+}
+
static void __init fixrange_init(unsigned long start, unsigned long end,
pgd_t *pgd_base)
{
@@ -125,6 +140,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
p4d = p4d_offset(pgd, vaddr);
+ if (p4d_none(*p4d))
+ one_ud_table_init(p4d);
pud = pud_offset(p4d, vaddr);
if (pud_none(*pud))
one_md_table_init(pud);
@@ -139,7 +156,6 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
static void __init fixaddr_user_init( void)
{
-#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
long size = FIXADDR_USER_END - FIXADDR_USER_START;
pte_t *pte;
phys_t p;
@@ -161,13 +177,12 @@ static void __init fixaddr_user_init( void)
pte = virt_to_kpte(vaddr);
pte_set_val(*pte, p, PAGE_READONLY);
}
-#endif
}
+#endif
void __init paging_init(void)
{
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
- unsigned long vaddr;
empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
PAGE_SIZE);
@@ -178,14 +193,9 @@ void __init paging_init(void)
max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT;
free_area_init(max_zone_pfn);
- /*
- * Fixed mappings, only the page table structure has to be
- * created - mappings will be set by set_fixmap():
- */
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir);
-
+#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA)
fixaddr_user_init();
+#endif
}
/*
@@ -201,14 +211,13 @@ void free_initmem(void)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+ pgd_t *pgd = __pgd_alloc(mm, 0);
- if (pgd) {
- memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ if (pgd)
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
- }
+
return pgd;
}
@@ -236,3 +245,11 @@ static const pgprot_t protection_map[16] = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED
};
DECLARE_VM_GET_PAGE_PROT
+
+void mark_rodata_ro(void)
+{
+ unsigned long rodata_start = PFN_ALIGN(__start_rodata);
+ unsigned long rodata_end = PFN_ALIGN(__end_rodata);
+
+ os_protect_memory((void *)rodata_start, rodata_end - rodata_start, 1, 0, 0);
+}
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 91485119ae67..af02b5f9911d 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -12,6 +12,7 @@
#include <as-layout.h>
#include <init.h>
#include <kern.h>
+#include <kern_util.h>
#include <mem_user.h>
#include <os.h>
@@ -21,23 +22,6 @@ static int physmem_fd = -1;
unsigned long high_physmem;
EXPORT_SYMBOL(high_physmem);
-extern unsigned long long physmem_size;
-
-void __init mem_total_pages(unsigned long physmem, unsigned long iomem,
- unsigned long highmem)
-{
- unsigned long phys_pages, highmem_pages;
- unsigned long iomem_pages, total_pages;
-
- phys_pages = physmem >> PAGE_SHIFT;
- iomem_pages = iomem >> PAGE_SHIFT;
- highmem_pages = highmem >> PAGE_SHIFT;
-
- total_pages = phys_pages + iomem_pages + highmem_pages;
-
- max_mapnr = total_pages;
-}
-
void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
int r, int w, int x)
{
@@ -63,13 +47,12 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
* @reserve_end: end address of the physical kernel memory.
* @len: Length of total physical memory that should be mapped/made
* available, in bytes.
- * @highmem: Number of highmem bytes that should be mapped/made available.
*
- * Creates an unlinked temporary file of size (len + highmem) and memory maps
+ * Creates an unlinked temporary file of size (len) and memory maps
* it on the last executable image address (uml_reserved).
*
* The offset is needed as the length of the total physical memory
- * (len + highmem) includes the size of the memory used be the executable image,
+ * (len) includes the size of the memory used be the executable image,
* but the mapped-to address is the last address of the executable image
* (uml_reserved == end address of executable image).
*
@@ -77,24 +60,24 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
* of all user space processes/kernel tasks.
*/
void __init setup_physmem(unsigned long start, unsigned long reserve_end,
- unsigned long len, unsigned long long highmem)
+ unsigned long len)
{
unsigned long reserve = reserve_end - start;
- long map_size = len - reserve;
+ unsigned long map_size = len - reserve;
int err;
- if(map_size <= 0) {
+ if (len <= reserve) {
os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
reserve, len);
exit(1);
}
- physmem_fd = create_mem_file(len + highmem);
+ physmem_fd = create_mem_file(len);
err = os_map_memory((void *) reserve_end, physmem_fd, reserve,
map_size, 1, 1, 1);
if (err < 0) {
- os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p "
+ os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p "
"failed - errno = %d\n", map_size,
(void *) reserve_end, err);
exit(1);
@@ -106,9 +89,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
*/
os_seek_file(physmem_fd, __pa(__syscall_stub_start));
os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE);
- os_fsync_file(physmem_fd);
- memblock_add(__pa(start), len + highmem);
+ memblock_add(__pa(start), len);
memblock_reserve(__pa(start), reserve);
min_low_pfn = PFN_UP(__pa(reserve_end));
@@ -136,10 +118,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
region = region->next;
}
}
- else if (phys < __pa(end_iomem) + highmem) {
- fd = physmem_fd;
- *offset_out = phys - iomem_size;
- }
return fd;
}
@@ -148,6 +126,8 @@ EXPORT_SYMBOL(phys_mapping);
static int __init uml_mem_setup(char *line, int *add)
{
char *retptr;
+
+ *add = 0;
physmem_size = memparse(line,&retptr);
return 0;
}
@@ -161,8 +141,6 @@ __uml_setup("mem=", uml_mem_setup,
" Example: mem=64M\n\n"
);
-extern int __init parse_iomem(char *str, int *add);
-
__uml_setup("iomem=", parse_iomem,
"iomem=<name>,<file>\n"
" Configure <file> as an IO memory region named <name>.\n\n"
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index ab95648e93e1..0cd6fad3d908 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -15,6 +15,7 @@
#include <linux/proc_fs.h>
#include <linux/ptrace.h>
#include <linux/random.h>
+#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
@@ -26,6 +27,8 @@
#include <linux/resume_user_mode.h>
#include <asm/current.h>
#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
+#include <asm/exec.h>
#include <linux/uaccess.h>
#include <as-layout.h>
#include <kern_util.h>
@@ -40,24 +43,8 @@
* cares about its entry, so it's OK if another processor is modifying its
* entry.
*/
-struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } };
-
-static inline int external_pid(void)
-{
- /* FIXME: Need to look up userspace_pid by cpu */
- return userspace_pid[0];
-}
-
-int pid_to_processor_id(int pid)
-{
- int i;
-
- for (i = 0; i < ncpus; i++) {
- if (cpu_tasks[i].pid == pid)
- return i;
- }
- return -1;
-}
+struct task_struct *cpu_tasks[NR_CPUS];
+EXPORT_SYMBOL(cpu_tasks);
void free_stack(unsigned long stack, int order)
{
@@ -78,13 +65,10 @@ unsigned long alloc_stack(int order, int atomic)
static inline void set_current(struct task_struct *task)
{
- cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
- { external_pid(), task });
+ cpu_tasks[task_thread_info(task)->cpu] = task;
}
-extern void arch_switch_to(struct task_struct *to);
-
-void *__switch_to(struct task_struct *from, struct task_struct *to)
+struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to)
{
to->thread.prev_sched = from;
set_current(to);
@@ -119,28 +103,26 @@ int get_current_pid(void)
*/
void new_thread_handler(void)
{
- int (*fn)(void *), n;
+ int (*fn)(void *);
void *arg;
if (current->thread.prev_sched != NULL)
schedule_tail(current->thread.prev_sched);
current->thread.prev_sched = NULL;
- fn = current->thread.request.u.thread.proc;
- arg = current->thread.request.u.thread.arg;
+ fn = current->thread.request.thread.proc;
+ arg = current->thread.request.thread.arg;
/*
* callback returns only if the kernel thread execs a process
*/
- n = fn(arg);
- userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
+ fn(arg);
+ userspace(&current->thread.regs.regs);
}
/* Called magically, see new_thread_handler above */
-void fork_handler(void)
+static void fork_handler(void)
{
- force_flush_all();
-
schedule_tail(current->thread.prev_sched);
/*
@@ -152,7 +134,7 @@ void fork_handler(void)
current->thread.prev_sched = NULL;
- userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
+ userspace(&current->thread.regs.regs);
}
int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
@@ -177,8 +159,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
arch_copy_thread(&current->thread.arch, &p->thread.arch);
} else {
get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
- p->thread.request.u.thread.proc = args->fn;
- p->thread.request.u.thread.arg = args->fn_arg;
+ p->thread.request.thread.proc = args->fn;
+ p->thread.request.thread.arg = args->fn_arg;
handler = new_thread_handler;
}
@@ -206,6 +188,21 @@ void initial_thread_cb(void (*proc)(void *), void *arg)
kmalloc_ok = save_kmalloc_ok;
}
+int arch_dup_task_struct(struct task_struct *dst,
+ struct task_struct *src)
+{
+ /* init_task is not dynamically sized (missing FPU state) */
+ if (unlikely(src == &init_task)) {
+ memcpy(dst, src, sizeof(init_task));
+ memset((void *)dst + sizeof(init_task), 0,
+ arch_task_struct_size - sizeof(init_task));
+ } else {
+ memcpy(dst, src, arch_task_struct_size);
+ }
+
+ return 0;
+}
+
void um_idle_sleep(void)
{
if (time_travel_mode != TT_MODE_OFF)
@@ -216,7 +213,6 @@ void um_idle_sleep(void)
void arch_cpu_idle(void)
{
- cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
um_idle_sleep();
}
@@ -225,14 +221,6 @@ int __uml_cant_sleep(void) {
/* Is in_interrupt() really needed? */
}
-int user_context(unsigned long sp)
-{
- unsigned long stack;
-
- stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
- return stack != (unsigned long) current_thread_info();
-}
-
extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
void do_uml_exitcalls(void)
@@ -250,88 +238,11 @@ char *uml_strdup(const char *string)
}
EXPORT_SYMBOL(uml_strdup);
-int copy_to_user_proc(void __user *to, void *from, int size)
-{
- return copy_to_user(to, from, size);
-}
-
int copy_from_user_proc(void *to, void __user *from, int size)
{
return copy_from_user(to, from, size);
}
-int clear_user_proc(void __user *buf, int size)
-{
- return clear_user(buf, size);
-}
-
-static atomic_t using_sysemu = ATOMIC_INIT(0);
-int sysemu_supported;
-
-void set_using_sysemu(int value)
-{
- if (value > sysemu_supported)
- return;
- atomic_set(&using_sysemu, value);
-}
-
-int get_using_sysemu(void)
-{
- return atomic_read(&using_sysemu);
-}
-
-static int sysemu_proc_show(struct seq_file *m, void *v)
-{
- seq_printf(m, "%d\n", get_using_sysemu());
- return 0;
-}
-
-static int sysemu_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sysemu_proc_show, NULL);
-}
-
-static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
- size_t count, loff_t *pos)
-{
- char tmp[2];
-
- if (copy_from_user(tmp, buf, 1))
- return -EFAULT;
-
- if (tmp[0] >= '0' && tmp[0] <= '2')
- set_using_sysemu(tmp[0] - '0');
- /* We use the first char, but pretend to write everything */
- return count;
-}
-
-static const struct proc_ops sysemu_proc_ops = {
- .proc_open = sysemu_proc_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = single_release,
- .proc_write = sysemu_proc_write,
-};
-
-int __init make_proc_sysemu(void)
-{
- struct proc_dir_entry *ent;
- if (!sysemu_supported)
- return 0;
-
- ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
-
- if (ent == NULL)
- {
- printk(KERN_WARNING "Failed to register /proc/sysemu\n");
- return 0;
- }
-
- return 0;
-}
-
-late_initcall(make_proc_sysemu);
-
int singlestepping(void)
{
return test_thread_flag(TIF_SINGLESTEP);
@@ -384,11 +295,3 @@ unsigned long __get_wchan(struct task_struct *p)
return 0;
}
-
-int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
-{
- int cpu = current_thread_info()->cpu;
-
- return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu);
-}
-
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 6600a2782796..2124624b7817 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -35,9 +35,6 @@ void ptrace_disable(struct task_struct *child)
user_disable_single_step(child);
}
-extern int peek_user(struct task_struct * child, long addr, long data);
-extern int poke_user(struct task_struct * child, long addr, long data);
-
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 48c0610d506e..680bce4bd8fa 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -9,6 +9,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/oom.h>
+#include <linux/reboot.h>
#include <kern_util.h>
#include <os.h>
#include <skas.h>
@@ -28,7 +29,7 @@ static void kill_off_processes(void)
t = find_lock_task_mm(p);
if (!t)
continue;
- pid = t->mm->context.id.u.pid;
+ pid = t->mm->context.id.pid;
task_unlock(t);
os_kill_ptraced_process(pid, 1);
}
@@ -58,3 +59,18 @@ void machine_halt(void)
{
machine_power_off();
}
+
+static int sys_power_off_handler(struct sys_off_data *data)
+{
+ machine_power_off();
+ return 0;
+}
+
+static int register_power_off(void)
+{
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ sys_power_off_handler, NULL);
+ return 0;
+}
+__initcall(register_power_off);
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index 5085a50c3b8c..4fc04742048a 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -8,32 +8,6 @@
#include <os.h>
#include <sigio.h>
-/* Protected by sigio_lock() called from write_sigio_workaround */
-static int sigio_irq_fd = -1;
-
-static irqreturn_t sigio_interrupt(int irq, void *data)
-{
- char c;
-
- os_read_file(sigio_irq_fd, &c, sizeof(c));
- return IRQ_HANDLED;
-}
-
-int write_sigio_irq(int fd)
-{
- int err;
-
- err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
- 0, "write sigio", NULL);
- if (err < 0) {
- printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
- "err = %d\n", err);
- return -1;
- }
- sigio_irq_fd = fd;
- return 0;
-}
-
/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
static DEFINE_MUTEX(sigio_mutex);
diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore
new file mode 100644
index 000000000000..c3409ced0f38
--- /dev/null
+++ b/arch/um/kernel/skas/.gitignore
@@ -0,0 +1,2 @@
+stub_exe
+stub_exe.dbg
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index f93972a25765..3384be42691f 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,15 +3,48 @@
# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
#
-obj-y := clone.o mmu.o process.o syscall.o uaccess.o
+obj-y := stub.o mmu.o process.o syscall.o uaccess.o \
+ stub_exe_embed.o
-# clone.o is in the stub, so it can't be built with profiling
+# Stub executable
+
+stub_exe_objs-y := stub_exe.o
+
+stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F)
+
+# Object file containing the ELF executable
+$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe
+
+$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE
+ $(call if_changed,stub_exe)
+
+$(obj)/stub_exe: OBJCOPYFLAGS := -S
+$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE
+ $(call if_changed,objcopy)
+
+quiet_cmd_stub_exe = STUB_EXE $@
+ cmd_stub_exe = $(CC) -nostdlib -o $@ \
+ $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \
+ $(filter %.o,$^)
+
+STUB_EXE_LDFLAGS = -Wl,-n -static
+
+targets += stub_exe.dbg stub_exe $(stub_exe_objs-y)
+
+# end
+
+# stub.o is in the stub, so it can't be built with profiling
# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
# disable it
-CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := clone.o
+CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
+CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING)
+
+# Clang will call memset() from __builtin_alloca() when stack variable
+# initialization is enabled, which is used in stub_exe.c.
+CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+UNPROFILE_OBJS := stub.o stub_exe.o
KCOV_INSTRUMENT := n
include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
deleted file mode 100644
index 62435187dda4..000000000000
--- a/arch/um/kernel/skas/clone.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <signal.h>
-#include <sched.h>
-#include <asm/unistd.h>
-#include <sys/time.h>
-#include <as-layout.h>
-#include <ptrace_user.h>
-#include <stub-data.h>
-#include <sysdep/stub.h>
-
-/*
- * This is in a separate file because it needs to be compiled with any
- * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
- *
- * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
- * on some systems.
- */
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_clone_handler(void)
-{
- struct stub_data *data = get_stub_data();
- long err;
-
- err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
- (unsigned long)data +
- STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2);
- if (err) {
- data->parent_err = err;
- goto done;
- }
-
- err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
- if (err) {
- data->child_err = err;
- goto done;
- }
-
- remap_stack_and_trap();
-
- done:
- trap_myself();
-}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 656fe16c9b63..0eb5a1d3ba70 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -10,14 +10,18 @@
#include <asm/pgalloc.h>
#include <asm/sections.h>
+#include <asm/mmu_context.h>
#include <as-layout.h>
#include <os.h>
#include <skas.h>
+#include <stub-data.h>
+
+/* Ensure the stub_data struct covers the allocated area */
+static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
- struct mm_context *from_mm = NULL;
- struct mm_context *to_mm = &mm->context;
+ struct mm_id *new_id = &mm->context.id;
unsigned long stack = 0;
int ret = -ENOMEM;
@@ -25,34 +29,24 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
if (stack == 0)
goto out;
- to_mm->id.stack = stack;
- if (current->mm != NULL && current->mm != &init_mm)
- from_mm = &current->mm->context;
+ new_id->stack = stack;
block_signals_trace();
- if (from_mm)
- to_mm->id.u.pid = copy_context_skas0(stack,
- from_mm->id.u.pid);
- else to_mm->id.u.pid = start_userspace(stack);
+ new_id->pid = start_userspace(stack);
unblock_signals_trace();
- if (to_mm->id.u.pid < 0) {
- ret = to_mm->id.u.pid;
+ if (new_id->pid < 0) {
+ ret = new_id->pid;
goto out_free;
}
- ret = init_new_ldt(to_mm, from_mm);
- if (ret < 0) {
- printk(KERN_ERR "init_new_context_skas - init_ldt"
- " failed, errno = %d\n", ret);
- goto out_free;
- }
+ /* Ensure the new MM is clean and nothing unwanted is mapped */
+ unmap(new_id, 0, STUB_START);
return 0;
out_free:
- if (to_mm->id.stack != 0)
- free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES));
+ free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out:
return ret;
}
@@ -67,13 +61,12 @@ void destroy_context(struct mm_struct *mm)
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
*/
- if (mmu->id.u.pid < 2) {
+ if (mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
- mmu->id.u.pid);
+ mmu->id.pid);
return;
}
- os_kill_ptraced_process(mmu->id.u.pid, 1);
+ os_kill_ptraced_process(mmu->id.pid, 1);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
- free_ldt(mmu);
}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index f2ac134c9752..05dcdc057af9 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -8,22 +8,19 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/task.h>
+#include <asm/tlbflush.h>
+
#include <as-layout.h>
#include <kern.h>
#include <os.h>
#include <skas.h>
+#include <kern_util.h>
extern void start_kernel(void);
static int __init start_kernel_proc(void *unused)
{
- int pid;
-
block_signals_trace();
- pid = os_getpid();
-
- cpu_tasks[0].pid = pid;
- cpu_tasks[0].task = current;
start_kernel();
return 0;
@@ -31,7 +28,7 @@ static int __init start_kernel_proc(void *unused)
extern int userspace_pid[];
-extern char cpu0_irqstack[];
+static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE);
int __init start_uml(void)
{
@@ -40,8 +37,8 @@ int __init start_uml(void)
init_new_thread_signals();
- init_task.thread.request.u.thread.proc = start_kernel_proc;
- init_task.thread.request.u.thread.arg = NULL;
+ init_task.thread.request.thread.proc = start_kernel_proc;
+ init_task.thread.request.thread.arg = NULL;
return start_idle_thread(task_stack_page(&init_task),
&init_task.thread.switch_buf);
}
@@ -53,3 +50,19 @@ unsigned long current_stub_stack(void)
return current->mm->context.id.stack;
}
+
+struct mm_id *current_mm_id(void)
+{
+ if (current->mm == NULL)
+ return NULL;
+
+ return &current->mm->context.id;
+}
+
+void current_mm_sync(void)
+{
+ if (current->mm == NULL)
+ return;
+
+ um_tlb_sync(current->mm);
+}
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
new file mode 100644
index 000000000000..796fc266d3bb
--- /dev/null
+++ b/arch/um/kernel/skas/stub.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
+ */
+
+#include <sysdep/stub.h>
+
+static __always_inline int syscall_handler(struct stub_data *d)
+{
+ int i;
+ unsigned long res;
+
+ for (i = 0; i < d->syscall_data_len; i++) {
+ struct stub_syscall *sc = &d->syscall_data[i];
+
+ switch (sc->syscall) {
+ case STUB_SYSCALL_MMAP:
+ res = stub_syscall6(STUB_MMAP_NR,
+ sc->mem.addr, sc->mem.length,
+ sc->mem.prot,
+ MAP_SHARED | MAP_FIXED,
+ sc->mem.fd, sc->mem.offset);
+ if (res != sc->mem.addr) {
+ d->err = res;
+ d->syscall_data_len = i;
+ return -1;
+ }
+ break;
+ case STUB_SYSCALL_MUNMAP:
+ res = stub_syscall2(__NR_munmap,
+ sc->mem.addr, sc->mem.length);
+ if (res) {
+ d->err = res;
+ d->syscall_data_len = i;
+ return -1;
+ }
+ break;
+ default:
+ d->err = -95; /* EOPNOTSUPP */
+ d->syscall_data_len = i;
+ return -1;
+ }
+ }
+
+ d->err = 0;
+ d->syscall_data_len = 0;
+
+ return 0;
+}
+
+void __section(".__syscall_stub")
+stub_syscall_handler(void)
+{
+ struct stub_data *d = get_stub_data();
+
+ syscall_handler(d);
+
+ trap_myself();
+}
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
new file mode 100644
index 000000000000..23c99b285e82
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -0,0 +1,95 @@
+#include <sys/ptrace.h>
+#include <sys/prctl.h>
+#include <asm/unistd.h>
+#include <sysdep/stub.h>
+#include <stub-data.h>
+
+void _start(void);
+
+noinline static void real_init(void)
+{
+ struct stub_init_data init_data;
+ unsigned long res;
+ struct {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+ } stack = {
+ .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+ };
+ struct {
+ void *sa_handler_;
+ unsigned long sa_flags;
+ void *sa_restorer;
+ unsigned long long sa_mask;
+ } sa = {
+ /* Need to set SA_RESTORER (but the handler never returns) */
+ .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
+ /* no need to mask any signals */
+ .sa_mask = 0,
+ };
+
+ /* set a nice name */
+ stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace");
+
+ /* Make sure this process dies if the kernel dies */
+ stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
+
+ /* read information from STDIN and close it */
+ res = stub_syscall3(__NR_read, 0,
+ (unsigned long)&init_data, sizeof(init_data));
+ if (res != sizeof(init_data))
+ stub_syscall1(__NR_exit, 10);
+
+ stub_syscall1(__NR_close, 0);
+
+ /* map stub code + data */
+ res = stub_syscall6(STUB_MMAP_NR,
+ init_data.stub_start, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED,
+ init_data.stub_code_fd, init_data.stub_code_offset);
+ if (res != init_data.stub_start)
+ stub_syscall1(__NR_exit, 11);
+
+ res = stub_syscall6(STUB_MMAP_NR,
+ init_data.stub_start + UM_KERN_PAGE_SIZE,
+ STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+ init_data.stub_data_fd, init_data.stub_data_offset);
+ if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
+ stub_syscall1(__NR_exit, 12);
+
+ /* setup signal stack inside stub data */
+ stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
+ stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
+
+ /* register SIGSEGV handler */
+ sa.sa_handler_ = (void *) init_data.segv_handler;
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0,
+ sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 13);
+
+ stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+
+ stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+
+ stub_syscall1(__NR_exit, 14);
+
+ __builtin_unreachable();
+}
+
+__attribute__((naked)) void _start(void)
+{
+ /*
+ * Since the stack after exec() starts at the top-most address,
+ * but that's exactly where we also want to map the stub data
+ * and code, this must:
+ * - push the stack by 1 code and STUB_DATA_PAGES data pages
+ * - call real_init()
+ * This way, real_init() can use the stack normally, while the
+ * original stack further down (higher address) will become
+ * inaccessible after the mmap() calls above.
+ */
+ stub_start(real_init);
+}
diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S
new file mode 100644
index 000000000000..6d8914fbe8f1
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe_embed.S
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+__INITDATA
+
+SYM_DATA_START(stub_exe_start)
+ .incbin "arch/um/kernel/skas/stub_exe"
+SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end)
+
+__FINIT
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 9ee19e566da3..a5beaea2967e 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -12,23 +12,13 @@
#include <sysdep/syscalls.h>
#include <linux/time-internal.h>
#include <asm/unistd.h>
+#include <asm/delay.h>
void handle_syscall(struct uml_pt_regs *r)
{
struct pt_regs *regs = container_of(r, struct pt_regs, regs);
int syscall;
- /*
- * If we have infinite CPU resources, then make every syscall also a
- * preemption point, since we don't have any other preemption in this
- * case, and kernel threads would basically never run until userspace
- * went to sleep, even if said userspace interacts with the kernel in
- * various ways.
- */
- if (time_travel_mode == TT_MODE_INFCPU ||
- time_travel_mode == TT_MODE_EXTERNAL)
- schedule();
-
/* Initialize the syscall number and default return value. */
UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
@@ -41,9 +31,36 @@ void handle_syscall(struct uml_pt_regs *r)
goto out;
syscall = UPT_SYSCALL_NR(r);
- if (syscall >= 0 && syscall < __NR_syscalls)
- PT_REGS_SET_SYSCALL_RETURN(regs,
- EXECUTE_SYSCALL(syscall, regs));
+
+ /*
+ * If no time passes, then sched_yield may not actually yield, causing
+ * broken spinlock implementations in userspace (ASAN) to hang for long
+ * periods of time.
+ */
+ if ((time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL) &&
+ syscall == __NR_sched_yield)
+ tt_extra_sched_jiffies += 1;
+
+ if (syscall >= 0 && syscall < __NR_syscalls) {
+ unsigned long ret = EXECUTE_SYSCALL(syscall, regs);
+
+ PT_REGS_SET_SYSCALL_RETURN(regs, ret);
+
+ /*
+ * An error value here can be some form of -ERESTARTSYS
+ * and then we'd just loop. Make any error syscalls take
+ * some time, so that it won't just loop if something is
+ * not ready, and hopefully other things will make some
+ * progress.
+ */
+ if (IS_ERR_VALUE(ret) &&
+ (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL)) {
+ um_udelay(1);
+ schedule();
+ }
+ }
out:
syscall_trace_leave(regs);
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 746715379f12..13ee5666668d 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -11,7 +11,6 @@
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
-#include <asm/sysrq.h>
#include <asm/stacktrace.h>
#include <os.h>
@@ -33,12 +32,6 @@ void show_stack(struct task_struct *task, unsigned long *stack,
struct pt_regs *segv_regs = current->thread.segv_regs;
int i;
- if (!segv_regs && os_is_signal_stack()) {
- pr_err("Received SIGSEGV in SIGSEGV handler,"
- " aborting stack trace!\n");
- return;
- }
-
if (!stack)
stack = get_stack_pointer(task, segv_regs);
@@ -53,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack,
}
printk("%sCall Trace:\n", loglvl);
- dump_trace(current, &stackops, (void *)loglvl);
+ dump_trace(task ?: current, &stackops, (void *)loglvl);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 3e270da6b6f6..1394568c0210 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -19,17 +19,21 @@
#include <asm/param.h>
#include <kern_util.h>
#include <os.h>
+#include <linux/delay.h>
#include <linux/time-internal.h>
#include <linux/um_timetravel.h>
#include <shared/init.h>
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#include <linux/sched/clock.h>
+
enum time_travel_mode time_travel_mode;
EXPORT_SYMBOL_GPL(time_travel_mode);
static bool time_travel_start_set;
static unsigned long long time_travel_start;
static unsigned long long time_travel_time;
+static unsigned long long time_travel_shm_offset;
static LIST_HEAD(time_travel_events);
static LIST_HEAD(time_travel_irqs);
static unsigned long long time_travel_timer_interval;
@@ -39,8 +43,20 @@ static int time_travel_ext_fd = -1;
static unsigned int time_travel_ext_waiting;
static bool time_travel_ext_prev_request_valid;
static unsigned long long time_travel_ext_prev_request;
-static bool time_travel_ext_free_until_valid;
-static unsigned long long time_travel_ext_free_until;
+static unsigned long long *time_travel_ext_free_until;
+static unsigned long long _time_travel_ext_free_until;
+static u16 time_travel_shm_id;
+static struct um_timetravel_schedshm *time_travel_shm;
+static union um_timetravel_schedshm_client *time_travel_shm_client;
+
+unsigned long tt_extra_sched_jiffies;
+
+notrace unsigned long long sched_clock(void)
+{
+ return (unsigned long long)(jiffies - INITIAL_JIFFIES +
+ tt_extra_sched_jiffies)
+ * (NSEC_PER_SEC / HZ);
+}
static void time_travel_set_time(unsigned long long ns)
{
@@ -57,8 +73,52 @@ enum time_travel_message_handling {
TTMH_IDLE,
TTMH_POLL,
TTMH_READ,
+ TTMH_READ_START_ACK,
};
+static u64 bc_message;
+int time_travel_should_print_bc_msg;
+
+void _time_travel_print_bc_msg(void)
+{
+ time_travel_should_print_bc_msg = 0;
+ printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
+}
+
+static void time_travel_setup_shm(int fd, u16 id)
+{
+ u32 len;
+
+ time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
+
+ if (!time_travel_shm)
+ goto out;
+
+ len = time_travel_shm->len;
+
+ if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
+ len < struct_size(time_travel_shm, clients, id + 1)) {
+ os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
+ time_travel_shm = NULL;
+ goto out;
+ }
+
+ time_travel_shm = os_mremap_rw_shared(time_travel_shm,
+ sizeof(*time_travel_shm),
+ len);
+ if (!time_travel_shm)
+ goto out;
+
+ time_travel_shm_offset = time_travel_shm->current_time;
+ time_travel_shm_client = &time_travel_shm->clients[id];
+ time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
+ time_travel_shm_id = id;
+ /* always look at that free_until from now on */
+ time_travel_ext_free_until = &time_travel_shm->free_until;
+out:
+ os_close_file(fd);
+}
+
static void time_travel_handle_message(struct um_timetravel_msg *msg,
enum time_travel_message_handling mode)
{
@@ -79,7 +139,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
}
}
- ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+ if (unlikely(mode == TTMH_READ_START_ACK)) {
+ int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
+
+ ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
+ ARRAY_SIZE(fd), msg, sizeof(*msg));
+ if (ret == sizeof(*msg)) {
+ time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
+ msg->time & UM_TIMETRAVEL_START_ACK_ID);
+ /* we don't use the logging for now */
+ os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
+ }
+ } else {
+ ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+ }
if (ret == 0)
panic("time-travel external link is broken\n");
@@ -95,10 +168,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
return;
case UM_TIMETRAVEL_RUN:
time_travel_set_time(msg->time);
+ if (time_travel_shm) {
+ /* no request right now since we're running */
+ time_travel_shm_client->flags &=
+ ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+ /* no ack for shared memory RUN */
+ return;
+ }
break;
case UM_TIMETRAVEL_FREE_UNTIL:
- time_travel_ext_free_until_valid = true;
- time_travel_ext_free_until = msg->time;
+ /* not supposed to get this with shm, but ignore it */
+ if (time_travel_shm)
+ break;
+ time_travel_ext_free_until = &_time_travel_ext_free_until;
+ _time_travel_ext_free_until = msg->time;
+ break;
+ case UM_TIMETRAVEL_BROADCAST:
+ bc_message = msg->time;
+ time_travel_should_print_bc_msg = 1;
break;
}
@@ -135,8 +222,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
block_signals_hard();
os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
+ /* no ACK expected for WAIT in shared memory mode */
+ if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
+ goto done;
+
while (msg.op != UM_TIMETRAVEL_ACK)
- time_travel_handle_message(&msg, TTMH_READ);
+ time_travel_handle_message(&msg,
+ op == UM_TIMETRAVEL_START ?
+ TTMH_READ_START_ACK :
+ TTMH_READ);
if (msg.seq != mseq)
panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
@@ -144,6 +238,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
if (op == UM_TIMETRAVEL_GET)
time_travel_set_time(msg.time);
+done:
unblock_signals_hard();
return msg.time;
@@ -179,13 +274,33 @@ static void time_travel_ext_update_request(unsigned long long time)
/*
* if we're running and are allowed to run past the request
* then we don't need to update it either
+ *
+ * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+ * to shared memory, and for non-shm the offset is 0.
*/
- if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
- time < time_travel_ext_free_until)
+ if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+ time < (*time_travel_ext_free_until - time_travel_shm_offset))
return;
time_travel_ext_prev_request = time;
time_travel_ext_prev_request_valid = true;
+
+ if (time_travel_shm) {
+ union um_timetravel_schedshm_client *running;
+
+ running = &time_travel_shm->clients[time_travel_shm->running_id];
+
+ if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
+ time_travel_shm_client->flags |=
+ UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+ time += time_travel_shm_offset;
+ time_travel_shm_client->req_time = time;
+ if (time < time_travel_shm->free_until)
+ time_travel_shm->free_until = time;
+ return;
+ }
+ }
+
time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
}
@@ -193,6 +308,14 @@ void __time_travel_propagate_time(void)
{
static unsigned long long last_propagated;
+ if (time_travel_shm) {
+ if (time_travel_shm->running_id != time_travel_shm_id)
+ panic("time-travel: setting time while not running\n");
+ time_travel_shm->current_time = time_travel_time +
+ time_travel_shm_offset;
+ return;
+ }
+
if (last_propagated == time_travel_time)
return;
@@ -208,9 +331,12 @@ static bool time_travel_ext_request(unsigned long long time)
* If we received an external sync point ("free until") then we
* don't have to request/wait for anything until then, unless
* we're already waiting.
+ *
+ * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+ * to shared memory, and for non-shm the offset is 0.
*/
- if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
- time < time_travel_ext_free_until)
+ if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+ time < (*time_travel_ext_free_until - time_travel_shm_offset))
return false;
time_travel_ext_update_request(time);
@@ -224,7 +350,8 @@ static void time_travel_ext_wait(bool idle)
};
time_travel_ext_prev_request_valid = false;
- time_travel_ext_free_until_valid = false;
+ if (!time_travel_shm)
+ time_travel_ext_free_until = NULL;
time_travel_ext_waiting++;
time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@@ -247,7 +374,11 @@ static void time_travel_ext_wait(bool idle)
static void time_travel_ext_get_time(void)
{
- time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+ if (time_travel_shm)
+ time_travel_set_time(time_travel_shm->current_time -
+ time_travel_shm_offset);
+ else
+ time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
}
static void __time_travel_update_time(unsigned long long ns, bool idle)
@@ -319,10 +450,15 @@ void time_travel_add_event_rel(struct time_travel_event *e,
time_travel_add_event(e, time_travel_time + delay_ns);
}
-void time_travel_periodic_timer(struct time_travel_event *e)
+static void time_travel_periodic_timer(struct time_travel_event *e)
{
time_travel_add_event(&time_travel_timer_event,
time_travel_time + time_travel_timer_interval);
+
+ /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+ if (tt_extra_sched_jiffies > 0)
+ tt_extra_sched_jiffies -= 1;
+
deliver_alarm();
}
@@ -474,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
static void time_travel_oneshot_timer(struct time_travel_event *e)
{
+ /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+ if (tt_extra_sched_jiffies > 0)
+ tt_extra_sched_jiffies -= 1;
+
deliver_alarm();
}
@@ -719,7 +859,7 @@ static irqreturn_t um_timer(int irq, void *dev)
if (get_current()->mm != NULL)
{
/* userspace - relay signal, results in correct userspace timers */
- os_alarm_process(get_current()->mm->context.id.u.pid);
+ os_alarm_process(get_current()->mm->context.id.pid);
}
(*timer_clockevent.event_handler)(&timer_clockevent);
@@ -812,7 +952,7 @@ unsigned long calibrate_delay_is_known(void)
return 0;
}
-int setup_time_travel(char *str)
+static int setup_time_travel(char *str)
{
if (strcmp(str, "=inf-cpu") == 0) {
time_travel_mode = TT_MODE_INFCPU;
@@ -862,7 +1002,7 @@ __uml_help(setup_time_travel,
"devices using it, assuming the device has the right capabilities.\n"
"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
-int setup_time_travel_start(char *str)
+static int setup_time_travel_start(char *str)
{
int err;
@@ -874,9 +1014,49 @@ int setup_time_travel_start(char *str)
return 1;
}
-__setup("time-travel-start", setup_time_travel_start);
+__setup("time-travel-start=", setup_time_travel_start);
__uml_help(setup_time_travel_start,
-"time-travel-start=<seconds>\n"
+"time-travel-start=<nanoseconds>\n"
"Configure the UML instance's wall clock to start at this value rather than\n"
"the host's wall clock at the time of UML boot.\n");
+static struct kobject *bc_time_kobject;
+
+static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "0x%llx", bc_message);
+}
+
+static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ u64 user_bc_message;
+
+ ret = kstrtou64(buf, 0, &user_bc_message);
+ if (ret)
+ return ret;
+
+ bc_message = user_bc_message;
+
+ time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
+ pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
+ return count;
+}
+
+static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
+
+static int __init um_bc_start(void)
+{
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ return 0;
+
+ bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
+ if (!bc_time_kobject)
+ return 0;
+
+ if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
+ pr_debug("failed to create the bc file in /sys/kernel/um_time");
+
+ return 0;
+}
+late_initcall(um_bc_start);
#endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 7d050ab0f78a..cf7e0d4407f2 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -8,241 +8,82 @@
#include <linux/sched/signal.h>
#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
#include <as-layout.h>
#include <mem_user.h>
#include <os.h>
#include <skas.h>
#include <kern_util.h>
-struct host_vm_change {
- struct host_vm_op {
- enum { NONE, MMAP, MUNMAP, MPROTECT } type;
- union {
- struct {
- unsigned long addr;
- unsigned long len;
- unsigned int prot;
- int fd;
- __u64 offset;
- } mmap;
- struct {
- unsigned long addr;
- unsigned long len;
- } munmap;
- struct {
- unsigned long addr;
- unsigned long len;
- unsigned int prot;
- } mprotect;
- } u;
- } ops[1];
- int userspace;
- int index;
- struct mm_struct *mm;
- void *data;
- int force;
-};
-
-#define INIT_HVC(mm, force, userspace) \
- ((struct host_vm_change) \
- { .ops = { { .type = NONE } }, \
- .mm = mm, \
- .data = NULL, \
- .userspace = userspace, \
- .index = 0, \
- .force = force })
-
-static void report_enomem(void)
-{
- printk(KERN_ERR "UML ran out of memory on the host side! "
- "This can happen due to a memory limitation or "
- "vm.max_map_count has been reached.\n");
-}
-
-static int do_ops(struct host_vm_change *hvc, int end,
- int finished)
-{
- struct host_vm_op *op;
- int i, ret = 0;
+struct vm_ops {
+ struct mm_id *mm_idp;
- for (i = 0; i < end && !ret; i++) {
- op = &hvc->ops[i];
- switch (op->type) {
- case MMAP:
- if (hvc->userspace)
- ret = map(&hvc->mm->context.id, op->u.mmap.addr,
- op->u.mmap.len, op->u.mmap.prot,
- op->u.mmap.fd,
- op->u.mmap.offset, finished,
- &hvc->data);
- else
- map_memory(op->u.mmap.addr, op->u.mmap.offset,
- op->u.mmap.len, 1, 1, 1);
- break;
- case MUNMAP:
- if (hvc->userspace)
- ret = unmap(&hvc->mm->context.id,
- op->u.munmap.addr,
- op->u.munmap.len, finished,
- &hvc->data);
- else
- ret = os_unmap_memory(
- (void *) op->u.munmap.addr,
- op->u.munmap.len);
-
- break;
- case MPROTECT:
- if (hvc->userspace)
- ret = protect(&hvc->mm->context.id,
- op->u.mprotect.addr,
- op->u.mprotect.len,
- op->u.mprotect.prot,
- finished, &hvc->data);
- else
- ret = os_protect_memory(
- (void *) op->u.mprotect.addr,
- op->u.mprotect.len,
- 1, 1, 1);
- break;
- default:
- printk(KERN_ERR "Unknown op type %d in do_ops\n",
- op->type);
- BUG();
- break;
- }
- }
-
- if (ret == -ENOMEM)
- report_enomem();
-
- return ret;
-}
+ int (*mmap)(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len, int prot,
+ int phys_fd, unsigned long long offset);
+ int (*unmap)(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len);
+};
-static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
- unsigned int prot, struct host_vm_change *hvc)
+static int kern_map(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len, int prot,
+ int phys_fd, unsigned long long offset)
{
- __u64 offset;
- struct host_vm_op *last;
- int fd = -1, ret = 0;
-
- if (hvc->userspace)
- fd = phys_mapping(phys, &offset);
- else
- offset = phys;
- if (hvc->index != 0) {
- last = &hvc->ops[hvc->index - 1];
- if ((last->type == MMAP) &&
- (last->u.mmap.addr + last->u.mmap.len == virt) &&
- (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
- (last->u.mmap.offset + last->u.mmap.len == offset)) {
- last->u.mmap.len += len;
- return 0;
- }
- }
-
- if (hvc->index == ARRAY_SIZE(hvc->ops)) {
- ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
- hvc->index = 0;
- }
-
- hvc->ops[hvc->index++] = ((struct host_vm_op)
- { .type = MMAP,
- .u = { .mmap = { .addr = virt,
- .len = len,
- .prot = prot,
- .fd = fd,
- .offset = offset }
- } });
- return ret;
+ /* TODO: Why is executable needed to be always set in the kernel? */
+ return os_map_memory((void *)virt, phys_fd, offset, len,
+ prot & UM_PROT_READ, prot & UM_PROT_WRITE,
+ 1);
}
-static int add_munmap(unsigned long addr, unsigned long len,
- struct host_vm_change *hvc)
+static int kern_unmap(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len)
{
- struct host_vm_op *last;
- int ret = 0;
-
- if (hvc->index != 0) {
- last = &hvc->ops[hvc->index - 1];
- if ((last->type == MUNMAP) &&
- (last->u.munmap.addr + last->u.mmap.len == addr)) {
- last->u.munmap.len += len;
- return 0;
- }
- }
-
- if (hvc->index == ARRAY_SIZE(hvc->ops)) {
- ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
- hvc->index = 0;
- }
-
- hvc->ops[hvc->index++] = ((struct host_vm_op)
- { .type = MUNMAP,
- .u = { .munmap = { .addr = addr,
- .len = len } } });
- return ret;
+ return os_unmap_memory((void *)virt, len);
}
-static int add_mprotect(unsigned long addr, unsigned long len,
- unsigned int prot, struct host_vm_change *hvc)
+void report_enomem(void)
{
- struct host_vm_op *last;
- int ret = 0;
-
- if (hvc->index != 0) {
- last = &hvc->ops[hvc->index - 1];
- if ((last->type == MPROTECT) &&
- (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
- (last->u.mprotect.prot == prot)) {
- last->u.mprotect.len += len;
- return 0;
- }
- }
-
- if (hvc->index == ARRAY_SIZE(hvc->ops)) {
- ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
- hvc->index = 0;
- }
-
- hvc->ops[hvc->index++] = ((struct host_vm_op)
- { .type = MPROTECT,
- .u = { .mprotect = { .addr = addr,
- .len = len,
- .prot = prot } } });
- return ret;
+ printk(KERN_ERR "UML ran out of memory on the host side! "
+ "This can happen due to a memory limitation or "
+ "vm.max_map_count has been reached.\n");
}
-#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
-
static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
pte_t *pte;
- int r, w, x, prot, ret = 0;
+ int ret = 0;
pte = pte_offset_kernel(pmd, addr);
do {
- r = pte_read(*pte);
- w = pte_write(*pte);
- x = pte_exec(*pte);
- if (!pte_young(*pte)) {
- r = 0;
- w = 0;
- } else if (!pte_dirty(*pte))
- w = 0;
+ if (!pte_needsync(*pte))
+ continue;
+
+ if (pte_present(*pte)) {
+ __u64 offset;
+ unsigned long phys = pte_val(*pte) & PAGE_MASK;
+ int fd = phys_mapping(phys, &offset);
+ int r, w, x, prot;
+
+ r = pte_read(*pte);
+ w = pte_write(*pte);
+ x = pte_exec(*pte);
+ if (!pte_young(*pte)) {
+ r = 0;
+ w = 0;
+ } else if (!pte_dirty(*pte))
+ w = 0;
+
+ prot = (r ? UM_PROT_READ : 0) |
+ (w ? UM_PROT_WRITE : 0) |
+ (x ? UM_PROT_EXEC : 0);
+
+ ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
+ prot, fd, offset);
+ } else
+ ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
- prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
- (x ? UM_PROT_EXEC : 0));
- if (hvc->force || pte_newpage(*pte)) {
- if (pte_present(*pte)) {
- if (pte_newpage(*pte))
- ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
- PAGE_SIZE, prot, hvc);
- } else
- ret = add_munmap(addr, PAGE_SIZE, hvc);
- } else if (pte_newprot(*pte))
- ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
*pte = pte_mkuptodate(*pte);
} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
return ret;
@@ -250,7 +91,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
static inline int update_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
pmd_t *pmd;
unsigned long next;
@@ -260,19 +101,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
do {
next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd)) {
- if (hvc->force || pmd_newpage(*pmd)) {
- ret = add_munmap(addr, next - addr, hvc);
+ if (pmd_needsync(*pmd)) {
+ ret = ops->unmap(ops->mm_idp, addr,
+ next - addr);
pmd_mkuptodate(*pmd);
}
}
- else ret = update_pte_range(pmd, addr, next, hvc);
+ else ret = update_pte_range(pmd, addr, next, ops);
} while (pmd++, addr = next, ((addr < end) && !ret));
return ret;
}
static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
pud_t *pud;
unsigned long next;
@@ -282,19 +124,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
- if (hvc->force || pud_newpage(*pud)) {
- ret = add_munmap(addr, next - addr, hvc);
+ if (pud_needsync(*pud)) {
+ ret = ops->unmap(ops->mm_idp, addr,
+ next - addr);
pud_mkuptodate(*pud);
}
}
- else ret = update_pmd_range(pud, addr, next, hvc);
+ else ret = update_pmd_range(pud, addr, next, ops);
} while (pud++, addr = next, ((addr < end) && !ret));
return ret;
}
static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
p4d_t *p4d;
unsigned long next;
@@ -304,227 +147,57 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
do {
next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d)) {
- if (hvc->force || p4d_newpage(*p4d)) {
- ret = add_munmap(addr, next - addr, hvc);
+ if (p4d_needsync(*p4d)) {
+ ret = ops->unmap(ops->mm_idp, addr,
+ next - addr);
p4d_mkuptodate(*p4d);
}
} else
- ret = update_pud_range(p4d, addr, next, hvc);
+ ret = update_pud_range(p4d, addr, next, ops);
} while (p4d++, addr = next, ((addr < end) && !ret));
return ret;
}
-static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force)
+int um_tlb_sync(struct mm_struct *mm)
{
pgd_t *pgd;
- struct host_vm_change hvc;
- unsigned long addr = start_addr, next;
- int ret = 0, userspace = 1;
+ struct vm_ops ops;
+ unsigned long addr = mm->context.sync_tlb_range_from, next;
+ int ret = 0;
+
+ if (mm->context.sync_tlb_range_to == 0)
+ return 0;
+
+ ops.mm_idp = &mm->context.id;
+ if (mm == &init_mm) {
+ ops.mmap = kern_map;
+ ops.unmap = kern_unmap;
+ } else {
+ ops.mmap = map;
+ ops.unmap = unmap;
+ }
- hvc = INIT_HVC(mm, force, userspace);
pgd = pgd_offset(mm, addr);
do {
- next = pgd_addr_end(addr, end_addr);
+ next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
if (!pgd_present(*pgd)) {
- if (force || pgd_newpage(*pgd)) {
- ret = add_munmap(addr, next - addr, &hvc);
+ if (pgd_needsync(*pgd)) {
+ ret = ops.unmap(ops.mm_idp, addr,
+ next - addr);
pgd_mkuptodate(*pgd);
}
} else
- ret = update_p4d_range(pgd, addr, next, &hvc);
- } while (pgd++, addr = next, ((addr < end_addr) && !ret));
-
- if (!ret)
- ret = do_ops(&hvc, hvc.index, 1);
+ ret = update_p4d_range(pgd, addr, next, &ops);
+ } while (pgd++, addr = next,
+ ((addr < mm->context.sync_tlb_range_to) && !ret));
- /* This is not an else because ret is modified above */
- if (ret) {
- struct mm_id *mm_idp = &current->mm->context.id;
-
- printk(KERN_ERR "fix_range_common: failed, killing current "
- "process: %d\n", task_tgid_vnr(current));
- mm_idp->kill = 1;
- }
-}
-
-static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
-{
- struct mm_struct *mm;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long addr, last;
- int updated = 0, err = 0, force = 0, userspace = 0;
- struct host_vm_change hvc;
-
- mm = &init_mm;
- hvc = INIT_HVC(mm, force, userspace);
- for (addr = start; addr < end;) {
- pgd = pgd_offset(mm, addr);
- if (!pgd_present(*pgd)) {
- last = ADD_ROUND(addr, PGDIR_SIZE);
- if (last > end)
- last = end;
- if (pgd_newpage(*pgd)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d)) {
- last = ADD_ROUND(addr, P4D_SIZE);
- if (last > end)
- last = end;
- if (p4d_newpage(*p4d)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pud = pud_offset(p4d, addr);
- if (!pud_present(*pud)) {
- last = ADD_ROUND(addr, PUD_SIZE);
- if (last > end)
- last = end;
- if (pud_newpage(*pud)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- last = ADD_ROUND(addr, PMD_SIZE);
- if (last > end)
- last = end;
- if (pmd_newpage(*pmd)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pte = pte_offset_kernel(pmd, addr);
- if (!pte_present(*pte) || pte_newpage(*pte)) {
- updated = 1;
- err = add_munmap(addr, PAGE_SIZE, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- if (pte_present(*pte))
- err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
- PAGE_SIZE, 0, &hvc);
- }
- else if (pte_newprot(*pte)) {
- updated = 1;
- err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
- }
- addr += PAGE_SIZE;
- }
- if (!err)
- err = do_ops(&hvc, hvc.index, 1);
-
- if (err < 0)
- panic("flush_tlb_kernel failed, errno = %d\n", err);
- return updated;
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- struct mm_struct *mm = vma->vm_mm;
- void *flush = NULL;
- int r, w, x, prot, err = 0;
- struct mm_id *mm_id;
-
- address &= PAGE_MASK;
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- goto kill;
-
- p4d = p4d_offset(pgd, address);
- if (!p4d_present(*p4d))
- goto kill;
-
- pud = pud_offset(p4d, address);
- if (!pud_present(*pud))
- goto kill;
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- goto kill;
-
- pte = pte_offset_kernel(pmd, address);
-
- r = pte_read(*pte);
- w = pte_write(*pte);
- x = pte_exec(*pte);
- if (!pte_young(*pte)) {
- r = 0;
- w = 0;
- } else if (!pte_dirty(*pte)) {
- w = 0;
- }
-
- mm_id = &mm->context.id;
- prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
- (x ? UM_PROT_EXEC : 0));
- if (pte_newpage(*pte)) {
- if (pte_present(*pte)) {
- unsigned long long offset;
- int fd;
-
- fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
- err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
- 1, &flush);
- }
- else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
- }
- else if (pte_newprot(*pte))
- err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
-
- if (err) {
- if (err == -ENOMEM)
- report_enomem();
-
- goto kill;
- }
-
- *pte = pte_mkuptodate(*pte);
+ if (ret == -ENOMEM)
+ report_enomem();
- return;
+ mm->context.sync_tlb_range_from = 0;
+ mm->context.sync_tlb_range_to = 0;
-kill:
- printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
- force_sig(SIGKILL);
+ return ret;
}
void flush_tlb_all(void)
@@ -539,66 +212,11 @@ void flush_tlb_all(void)
flush_tlb_mm(current->mm);
}
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
- flush_tlb_kernel_range_common(start, end);
-}
-
-void flush_tlb_kernel_vm(void)
-{
- flush_tlb_kernel_range_common(start_vm, end_vm);
-}
-
-void __flush_tlb_one(unsigned long addr)
-{
- flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
-}
-
-static void fix_range(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force)
-{
- /*
- * Don't bother flushing if this address space is about to be
- * destroyed.
- */
- if (atomic_read(&mm->mm_users) == 0)
- return;
-
- fix_range_common(mm, start_addr, end_addr, force);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- if (vma->vm_mm == NULL)
- flush_tlb_kernel_range_common(start, end);
- else fix_range(vma->vm_mm, start, end, 0);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
-void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
- fix_range(mm, start, end, 0);
-}
-
void flush_tlb_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma)
- fix_range(mm, vma->vm_start, vma->vm_end, 0);
-}
-
-void force_flush_all(void)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
- for_each_vma(vmi, vma)
- fix_range(mm, vma->vm_start, vma->vm_end, 1);
- mmap_read_unlock(mm);
+ um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
}
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 6d8ae86ae978..ce073150dc20 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -16,6 +16,7 @@
#include <kern_util.h>
#include <os.h>
#include <skas.h>
+#include <arch.h>
/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
@@ -113,7 +114,7 @@ good_area:
#if 0
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
- flush_tlb_page(vma, address);
+
out:
mmap_read_unlock(mm);
out_nosemaphore:
@@ -175,12 +176,14 @@ void fatal_sigsegv(void)
* @sig: the signal number
* @unused_si: the signal info struct; unused in this handler
* @regs: the ptrace register information
+ * @mc: the mcontext of the signal
*
* The handler first extracts the faultinfo from the UML ptrace regs struct.
* If the userfault did not happen in an UML userspace process, bad_segv is called.
* Otherwise the signal did happen in a cloned userspace process, handle it.
*/
-void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+ void *mc)
{
struct faultinfo * fi = UPT_FAULTINFO(regs);
@@ -189,7 +192,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
bad_segv(*fi, UPT_IP(regs));
return;
}
- segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
+ segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc);
}
/*
@@ -199,9 +202,8 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
* give us bad data!
*/
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
- struct uml_pt_regs *regs)
+ struct uml_pt_regs *regs, void *mc)
{
- jmp_buf *catcher;
int si_code;
int err;
int is_write = FAULT_WRITE(fi);
@@ -210,11 +212,33 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
- if (!is_user && (address >= start_vm) && (address < end_vm)) {
- flush_tlb_kernel_vm();
+ if (!is_user && init_mm.context.sync_tlb_range_to) {
+ /*
+ * Kernel has pending updates from set_ptes that were not
+ * flushed yet. Syncing them should fix the pagefault (if not
+ * we'll get here again and panic).
+ */
+ err = um_tlb_sync(&init_mm);
+ if (err == -ENOMEM)
+ report_enomem();
+ if (err)
+ panic("Failed to sync kernel TLBs: %d", err);
goto out;
}
else if (current->mm == NULL) {
+ if (current->pagefault_disabled) {
+ if (!mc) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault with pagefaults disabled but no mcontext");
+ }
+ if (!current->thread.segv_continue) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault without recovery target");
+ }
+ mc_set_rip(mc, current->thread.segv_continue);
+ current->thread.segv_continue = NULL;
+ goto out;
+ }
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with no mm");
}
@@ -237,15 +261,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
address = 0;
}
- catcher = current->thread.fault_catcher;
if (!err)
goto out;
- else if (catcher != NULL) {
- current->thread.fault_addr = (void *) address;
- UML_LONGJMP(catcher, 1);
- }
- else if (current->thread.fault_addr != NULL)
- panic("fault_addr set but no fault catcher");
else if (!is_user && arch_fixup(ip, regs))
goto out;
@@ -273,7 +290,8 @@ out:
return 0;
}
-void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
+void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs,
+ void *mc)
{
int code, err;
if (!UPT_IS_USER(regs)) {
@@ -301,15 +319,8 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
}
}
-void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
-{
- if (current->thread.fault_catcher != NULL)
- UML_LONGJMP(current->thread.fault_catcher, 1);
- else
- relay_signal(sig, si, regs);
-}
-
-void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+ void *mc)
{
do_IRQ(WINCH_IRQ, regs);
}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 7a9820797eae..d4b3b6742ec8 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -12,6 +12,7 @@
#include <linux/panic_notifier.h>
#include <linux/seq_file.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/utsname.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
@@ -23,6 +24,7 @@
#include <asm/cpufeature.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/text-patching.h>
#include <as-layout.h>
#include <arch.h>
#include <init.h>
@@ -64,9 +66,6 @@ struct cpuinfo_um boot_cpu_data = {
EXPORT_SYMBOL(boot_cpu_data);
-union thread_union cpu0_irqstack
- __section(".data..init_irqstack") =
- { .thread_info = INIT_THREAD_INFO(init_task) };
/* Changed in setup_arch, which is called in early boot */
static char host_info[(__NEW_UTS_LEN + 1) * 5];
@@ -80,7 +79,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "model name\t: UML\n");
seq_printf(m, "mode\t\t: skas\n");
seq_printf(m, "host\t\t: %s\n", host_info);
- seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no");
+ seq_printf(m, "fpu\t\t: %s\n", str_yes_no(cpu_has(&boot_cpu_data, X86_FEATURE_FPU)));
seq_printf(m, "flags\t\t:");
for (i = 0; i < 32*NCAPINTS; i++)
if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))
@@ -125,15 +124,12 @@ unsigned long uml_reserved; /* Also modified in mem_init */
unsigned long start_vm;
unsigned long end_vm;
-/* Set in uml_ncpus_setup */
-int ncpus = 1;
-
/* Set in early boot */
static int have_root __initdata;
static int have_console __initdata;
/* Set in uml_mem_setup and modified in linux_main */
-long long physmem_size = 64 * 1024 * 1024;
+unsigned long long physmem_size = 64 * 1024 * 1024;
EXPORT_SYMBOL(physmem_size);
static const char *usage_string =
@@ -169,19 +165,6 @@ __uml_setup("root=", uml_root_setup,
" root=/dev/ubd5\n\n"
);
-static int __init no_skas_debug_setup(char *line, int *add)
-{
- os_warn("'debug' is not necessary to gdb UML in skas mode - run\n");
- os_warn("'gdb linux'\n");
-
- return 0;
-}
-
-__uml_setup("debug", no_skas_debug_setup,
-"debug\n"
-" this flag is not needed to run gdb on UML in skas mode\n\n"
-);
-
static int __init uml_console_setup(char *line, int *add)
{
have_console = 1;
@@ -259,6 +242,8 @@ static struct notifier_block panic_exit_notifier = {
void uml_finishsetup(void)
{
+ cpu_tasks[0] = &init_task;
+
atomic_notifier_chain_register(&panic_notifier_list,
&panic_exit_notifier);
@@ -280,7 +265,7 @@ EXPORT_SYMBOL(end_iomem);
#define MIN_VMALLOC (32 * 1024 * 1024)
-static void parse_host_cpu_flags(char *line)
+static void __init parse_host_cpu_flags(char *line)
{
int i;
for (i = 0; i < 32*NCAPINTS; i++) {
@@ -288,7 +273,8 @@ static void parse_host_cpu_flags(char *line)
set_cpu_cap(&boot_cpu_data, i);
}
}
-static void parse_cache_line(char *line)
+
+static void __init parse_cache_line(char *line)
{
long res;
char *to_parse = strstr(line, ":");
@@ -304,7 +290,24 @@ static void parse_cache_line(char *line)
}
}
-int __init linux_main(int argc, char **argv)
+static unsigned long __init get_top_address(char **envp)
+{
+ unsigned long top_addr = (unsigned long) &top_addr;
+ int i;
+
+ /* The earliest variable should be after the program name in ELF */
+ for (i = 0; envp[i]; i++) {
+ if ((unsigned long) envp[i] > top_addr)
+ top_addr = (unsigned long) envp[i];
+ }
+
+ top_addr &= ~(UM_KERN_PAGE_SIZE - 1);
+ top_addr += UM_KERN_PAGE_SIZE;
+
+ return top_addr;
+}
+
+int __init linux_main(int argc, char **argv, char **envp)
{
unsigned long avail, diff;
unsigned long virtmem_size, max_physmem;
@@ -326,20 +329,23 @@ int __init linux_main(int argc, char **argv)
if (have_console == 0)
add_arg(DEFAULT_COMMAND_LINE_CONSOLE);
- host_task_size = os_get_top_address();
- /* reserve a few pages for the stubs (taking care of data alignment) */
- /* align the data portion */
- BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES));
- stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1);
+ host_task_size = get_top_address(envp);
+ /* reserve a few pages for the stubs */
+ stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE;
/* another page for the code portion */
stub_start -= PAGE_SIZE;
host_task_size = stub_start;
+ /* Limit TASK_SIZE to what is addressable by the page table */
+ task_size = host_task_size;
+ if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE)
+ task_size = PTRS_PER_PGD * PGDIR_SIZE;
+
/*
* TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
* out
*/
- task_size = host_task_size & PGDIR_MASK;
+ task_size = task_size & PGDIR_MASK;
/* OS sanity checks that need to happen before the kernel runs */
os_early_checks();
@@ -368,23 +374,18 @@ int __init linux_main(int argc, char **argv)
setup_machinename(init_utsname()->machine);
- highmem = 0;
+ physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK;
iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
- max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
- /*
- * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary,
- * so this makes sure that's true for highmem
- */
- max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1);
- if (physmem_size + iomem_size > max_physmem) {
- highmem = physmem_size + iomem_size - max_physmem;
- physmem_size -= highmem;
+ max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
+ if (physmem_size > max_physmem) {
+ physmem_size = max_physmem;
+ os_info("Physical memory size shrunk to %llu bytes\n",
+ physmem_size);
}
high_physmem = uml_physmem + physmem_size;
end_iomem = high_physmem + iomem_size;
- high_memory = (void *) end_iomem;
start_vm = VMALLOC_START;
@@ -400,6 +401,8 @@ int __init linux_main(int argc, char **argv)
os_info("Kernel virtual memory size shrunk to %lu bytes\n",
virtmem_size);
+ arch_task_struct_size = sizeof(struct task_struct) + host_fp_size;
+
os_flush_stdout();
return start_uml();
@@ -414,9 +417,8 @@ void __init setup_arch(char **cmdline_p)
{
u8 rng_seed[32];
- stack_protections((unsigned long) &init_thread_info);
- setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
- mem_total_pages(physmem_size, iomem_size, highmem);
+ stack_protections((unsigned long) init_task.stack);
+ setup_physmem(uml_physmem, uml_reserved, physmem_size);
uml_dtb_init();
read_initrd();
@@ -470,6 +472,11 @@ void *text_poke(void *addr, const void *opcode, size_t len)
return memcpy(addr, opcode, len);
}
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+ return text_poke(addr, opcode, len);
+}
+
void text_poke_sync(void)
{
}
diff --git a/arch/um/kernel/um_arch.h b/arch/um/kernel/um_arch.h
index 1e07fb7ee35e..46e731ab9dfc 100644
--- a/arch/um/kernel/um_arch.h
+++ b/arch/um/kernel/um_arch.h
@@ -11,4 +11,6 @@ extern void __init uml_dtb_init(void);
static inline void uml_dtb_init(void) { }
#endif
+extern int __init read_initrd(void);
+
#endif
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 5c92d58a78e8..a409d4b66114 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -77,8 +77,6 @@ SECTIONS
.data :
{
INIT_TASK_DATA(KERNEL_STACK_SIZE)
- . = ALIGN(KERNEL_STACK_SIZE);
- *(.data..init_irqstack)
DATA_DATA
*(.gnu.linkonce.d*)
CONSTRUCTORS