diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel.c | 54 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 54 | ||||
-rw-r--r-- | drivers/hv/connection.c | 6 | ||||
-rw-r--r-- | drivers/hv/hv.c | 78 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 88 | ||||
-rw-r--r-- | drivers/hv/hv_fcopy.c | 27 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 21 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 37 |
8 files changed, 323 insertions, 42 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 433f72a1c006..2978f5ee8d2a 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -73,14 +73,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, unsigned long flags; int ret, t, err = 0; - spin_lock_irqsave(&newchannel->sc_lock, flags); + spin_lock_irqsave(&newchannel->lock, flags); if (newchannel->state == CHANNEL_OPEN_STATE) { newchannel->state = CHANNEL_OPENING_STATE; } else { - spin_unlock_irqrestore(&newchannel->sc_lock, flags); + spin_unlock_irqrestore(&newchannel->lock, flags); return -EINVAL; } - spin_unlock_irqrestore(&newchannel->sc_lock, flags); + spin_unlock_irqrestore(&newchannel->lock, flags); newchannel->onchannel_callback = onchannelcallback; newchannel->channel_callback_context = context; @@ -366,8 +366,8 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, unsigned long flags; int ret = 0; - next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle); - atomic_inc(&vmbus_connection.next_gpadl_handle); + next_gpadl_handle = + (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1); ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount); if (ret) @@ -686,6 +686,50 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); /* * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet * using a GPADL Direct packet type. + * The buffer includes the vmbus descriptor. + */ +int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + struct vmbus_packet_mpb_array *desc, + u32 desc_size, + void *buffer, u32 bufferlen, u64 requestid) +{ + int ret; + u32 packetlen; + u32 packetlen_aligned; + struct kvec bufferlist[3]; + u64 aligned_data = 0; + bool signal = false; + + packetlen = desc_size + bufferlen; + packetlen_aligned = ALIGN(packetlen, sizeof(u64)); + + /* Setup the descriptor */ + desc->type = VM_PKT_DATA_USING_GPA_DIRECT; + desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */ + desc->length8 = (u16)(packetlen_aligned >> 3); + desc->transactionid = requestid; + desc->rangecount = 1; + + bufferlist[0].iov_base = desc; + bufferlist[0].iov_len = desc_size; + bufferlist[1].iov_base = buffer; + bufferlist[1].iov_len = bufferlen; + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + + if (ret == 0 && signal) + vmbus_setevent(channel); + + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); + +/* + * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * using a GPADL Direct packet type. */ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct hv_multipage_buffer *multi_pagebuffer, diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 2c59f030546b..3736f71bdec5 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -146,7 +146,7 @@ static struct vmbus_channel *alloc_channel(void) return NULL; spin_lock_init(&channel->inbound_lock); - spin_lock_init(&channel->sc_lock); + spin_lock_init(&channel->lock); INIT_LIST_HEAD(&channel->sc_list); INIT_LIST_HEAD(&channel->percpu_list); @@ -246,9 +246,9 @@ static void vmbus_process_rescind_offer(struct work_struct *work) spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); } else { primary_channel = channel->primary_channel; - spin_lock_irqsave(&primary_channel->sc_lock, flags); + spin_lock_irqsave(&primary_channel->lock, flags); list_del(&channel->sc_list); - spin_unlock_irqrestore(&primary_channel->sc_lock, flags); + spin_unlock_irqrestore(&primary_channel->lock, flags); } free_channel(channel); } @@ -279,9 +279,6 @@ static void vmbus_process_offer(struct work_struct *work) int ret; unsigned long flags; - /* The next possible work is rescind handling */ - INIT_WORK(&newchannel->work, vmbus_process_rescind_offer); - /* Make sure this is a new offer */ spin_lock_irqsave(&vmbus_connection.channel_lock, flags); @@ -323,9 +320,9 @@ static void vmbus_process_offer(struct work_struct *work) * Process the sub-channel. */ newchannel->primary_channel = channel; - spin_lock_irqsave(&channel->sc_lock, flags); + spin_lock_irqsave(&channel->lock, flags); list_add_tail(&newchannel->sc_list, &channel->sc_list); - spin_unlock_irqrestore(&channel->sc_lock, flags); + spin_unlock_irqrestore(&channel->lock, flags); if (newchannel->target_cpu != get_cpu()) { put_cpu(); @@ -341,11 +338,10 @@ static void vmbus_process_offer(struct work_struct *work) if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); - return; + goto done_init_rescind; } - free_channel(newchannel); - return; + goto err_free_chan; } /* @@ -364,6 +360,8 @@ static void vmbus_process_offer(struct work_struct *work) &newchannel->offermsg.offer.if_type, &newchannel->offermsg.offer.if_instance, newchannel); + if (!newchannel->device_obj) + goto err_free_chan; /* * Add the new device to the bus. This will kick off device-driver @@ -379,9 +377,19 @@ static void vmbus_process_offer(struct work_struct *work) list_del(&newchannel->listentry); spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); kfree(newchannel->device_obj); - - free_channel(newchannel); + goto err_free_chan; } +done_init_rescind: + spin_lock_irqsave(&newchannel->lock, flags); + /* The next possible work is rescind handling */ + INIT_WORK(&newchannel->work, vmbus_process_rescind_offer); + /* Check if rescind offer was already received */ + if (newchannel->rescind) + queue_work(newchannel->controlwq, &newchannel->work); + spin_unlock_irqrestore(&newchannel->lock, flags); + return; +err_free_chan: + free_channel(newchannel); } enum { @@ -516,6 +524,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) { struct vmbus_channel_rescind_offer *rescind; struct vmbus_channel *channel; + unsigned long flags; rescind = (struct vmbus_channel_rescind_offer *)hdr; channel = relid2channel(rescind->child_relid); @@ -524,11 +533,20 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) /* Just return here, no channel found */ return; + spin_lock_irqsave(&channel->lock, flags); channel->rescind = true; + /* + * channel->work.func != vmbus_process_rescind_offer means we are still + * processing offer request and the rescind offer processing should be + * postponed. It will be done at the very end of vmbus_process_offer() + * as rescind flag is being checked there. + */ + if (channel->work.func == vmbus_process_rescind_offer) + /* work is initialized for vmbus_process_rescind_offer() from + * vmbus_process_offer() where the channel got created */ + queue_work(channel->controlwq, &channel->work); - /* work is initialized for vmbus_process_rescind_offer() from - * vmbus_process_offer() where the channel got created */ - queue_work(channel->controlwq, &channel->work); + spin_unlock_irqrestore(&channel->lock, flags); } /* @@ -815,7 +833,7 @@ cleanup: struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) { struct list_head *cur, *tmp; - int cur_cpu = hv_context.vp_index[smp_processor_id()]; + int cur_cpu; struct vmbus_channel *cur_channel; struct vmbus_channel *outgoing_channel = primary; int cpu_distance, new_cpu_distance; @@ -823,6 +841,8 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) if (list_empty(&primary->sc_list)) return outgoing_channel; + cur_cpu = hv_context.vp_index[get_cpu()]; + put_cpu(); list_for_each_safe(cur, tmp, &primary->sc_list) { cur_channel = list_entry(cur, struct vmbus_channel, sc_list); if (cur_channel->state != CHANNEL_OPENED_STATE) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index e206619b946e..a63a795300b9 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -80,8 +80,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); - if (version == VERSION_WIN8_1) - msg->target_vcpu = hv_context.vp_index[smp_processor_id()]; + if (version == VERSION_WIN8_1) { + msg->target_vcpu = hv_context.vp_index[get_cpu()]; + put_cpu(); + } /* * Add to list before we send the request since we may diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 3e4235c7a47f..50e51a51ff8b 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -28,7 +28,9 @@ #include <linux/hyperv.h> #include <linux/version.h> #include <linux/interrupt.h> +#include <linux/clockchips.h> #include <asm/hyperv.h> +#include <asm/mshyperv.h> #include "hyperv_vmbus.h" /* The one and only */ @@ -37,6 +39,10 @@ struct hv_context hv_context = { .hypercall_page = NULL, }; +#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */ +#define HV_MAX_MAX_DELTA_TICKS 0xffffffff +#define HV_MIN_DELTA_TICKS 1 + /* * query_hypervisor_info - Get version info of the windows hypervisor */ @@ -144,6 +150,8 @@ int hv_init(void) sizeof(int) * NR_CPUS); memset(hv_context.event_dpc, 0, sizeof(void *) * NR_CPUS); + memset(hv_context.clk_evt, 0, + sizeof(void *) * NR_CPUS); max_leaf = query_hypervisor_info(); @@ -258,10 +266,63 @@ u16 hv_signal_event(void *con_id) return status; } +static int hv_ce_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + cycle_t current_tick; + + WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); + + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); + current_tick += delta; + wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick); + return 0; +} + +static void hv_ce_setmode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + union hv_timer_config timer_cfg; + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + /* unsupported */ + break; + + case CLOCK_EVT_MODE_ONESHOT: + timer_cfg.enable = 1; + timer_cfg.auto_enable = 1; + timer_cfg.sintx = VMBUS_MESSAGE_SINT; + wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + break; + + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0); + wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0); + break; + case CLOCK_EVT_MODE_RESUME: + break; + } +} + +static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) +{ + dev->name = "Hyper-V clockevent"; + dev->features = CLOCK_EVT_FEAT_ONESHOT; + dev->cpumask = cpumask_of(cpu); + dev->rating = 1000; + dev->owner = THIS_MODULE; + + dev->set_mode = hv_ce_setmode; + dev->set_next_event = hv_ce_set_next_event; +} + int hv_synic_alloc(void) { size_t size = sizeof(struct tasklet_struct); + size_t ced_size = sizeof(struct clock_event_device); int cpu; for_each_online_cpu(cpu) { @@ -272,6 +333,13 @@ int hv_synic_alloc(void) } tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu); + hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC); + if (hv_context.clk_evt[cpu] == NULL) { + pr_err("Unable to allocate clock event device\n"); + goto err; + } + hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu); + hv_context.synic_message_page[cpu] = (void *)get_zeroed_page(GFP_ATOMIC); @@ -305,6 +373,7 @@ err: static void hv_synic_free_cpu(int cpu) { kfree(hv_context.event_dpc[cpu]); + kfree(hv_context.clk_evt[cpu]); if (hv_context.synic_event_page[cpu]) free_page((unsigned long)hv_context.synic_event_page[cpu]); if (hv_context.synic_message_page[cpu]) @@ -388,6 +457,15 @@ void hv_synic_init(void *arg) hv_context.vp_index[cpu] = (u32)vp_index; INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); + + /* + * Register the per-cpu clockevent source. + */ + if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) + clockevents_config_and_register(hv_context.clk_evt[cpu], + HV_TIMER_FREQUENCY, + HV_MIN_DELTA_TICKS, + HV_MAX_MAX_DELTA_TICKS); return; } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b958ded8ac7e..ff169386b2c7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -533,6 +533,9 @@ struct hv_dynmem_device { */ struct task_struct *thread; + struct mutex ha_region_mutex; + struct completion waiter_event; + /* * A list of hot-add regions. */ @@ -549,7 +552,59 @@ struct hv_dynmem_device { static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); + #ifdef CONFIG_MEMORY_HOTPLUG +static void acquire_region_mutex(bool trylock) +{ + if (trylock) { + reinit_completion(&dm_device.waiter_event); + while (!mutex_trylock(&dm_device.ha_region_mutex)) + wait_for_completion(&dm_device.waiter_event); + } else { + mutex_lock(&dm_device.ha_region_mutex); + } +} + +static void release_region_mutex(bool trylock) +{ + if (trylock) { + mutex_unlock(&dm_device.ha_region_mutex); + } else { + mutex_unlock(&dm_device.ha_region_mutex); + complete(&dm_device.waiter_event); + } +} + +static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, + void *v) +{ + switch (val) { + case MEM_GOING_ONLINE: + acquire_region_mutex(true); + break; + + case MEM_ONLINE: + case MEM_CANCEL_ONLINE: + release_region_mutex(true); + if (dm_device.ha_waiting) { + dm_device.ha_waiting = false; + complete(&dm_device.ol_waitevent); + } + break; + + case MEM_GOING_OFFLINE: + case MEM_OFFLINE: + case MEM_CANCEL_OFFLINE: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block hv_memory_nb = { + .notifier_call = hv_memory_notifier, + .priority = 0 +}; + static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size) { @@ -591,6 +646,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, init_completion(&dm_device.ol_waitevent); dm_device.ha_waiting = true; + release_region_mutex(false); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -619,6 +675,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, * have not been "onlined" within the allowed time. */ wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); + acquire_region_mutex(false); post_status(&dm_device); } @@ -632,11 +689,6 @@ static void hv_online_page(struct page *pg) unsigned long cur_start_pgp; unsigned long cur_end_pgp; - if (dm_device.ha_waiting) { - dm_device.ha_waiting = false; - complete(&dm_device.ol_waitevent); - } - list_for_each(cur, &dm_device.ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); cur_start_pgp = (unsigned long) @@ -834,6 +886,7 @@ static void hot_add_req(struct work_struct *dummy) resp.hdr.size = sizeof(struct dm_hot_add_response); #ifdef CONFIG_MEMORY_HOTPLUG + acquire_region_mutex(false); pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; @@ -865,6 +918,7 @@ static void hot_add_req(struct work_struct *dummy) if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, rg_start, rg_sz); + release_region_mutex(false); #endif /* * The result field of the response structure has the @@ -928,9 +982,8 @@ static unsigned long compute_balloon_floor(void) * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) - * 8192 552 (1/32) - * 32768 1320 - * 131072 4392 + * 8192 768 (1/16) + * 32768 1536 (1/32) */ if (totalram_pages < MB2PAGES(128)) min_pages = MB2PAGES(8) + (totalram_pages >> 1); @@ -938,8 +991,10 @@ static unsigned long compute_balloon_floor(void) min_pages = MB2PAGES(40) + (totalram_pages >> 2); else if (totalram_pages < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (totalram_pages >> 3); + else if (totalram_pages < MB2PAGES(8192)) + min_pages = MB2PAGES(256) + (totalram_pages >> 4); else - min_pages = MB2PAGES(296) + (totalram_pages >> 5); + min_pages = MB2PAGES(512) + (totalram_pages >> 5); #undef MB2PAGES return min_pages; } @@ -1171,7 +1226,7 @@ static void balloon_down(struct hv_dynmem_device *dm, for (i = 0; i < range_count; i++) { free_balloon_pages(dm, &range_array[i]); - post_status(&dm_device); + complete(&dm_device.config_event); } if (req->more_pages == 1) @@ -1195,19 +1250,16 @@ static void balloon_onchannelcallback(void *context); static int dm_thread_func(void *dm_dev) { struct hv_dynmem_device *dm = dm_dev; - int t; while (!kthread_should_stop()) { - t = wait_for_completion_interruptible_timeout( + wait_for_completion_interruptible_timeout( &dm_device.config_event, 1*HZ); /* * The host expects us to post information on the memory * pressure every second. */ - - if (t == 0) - post_status(dm); - + reinit_completion(&dm_device.config_event); + post_status(dm); } return 0; @@ -1387,7 +1439,9 @@ static int balloon_probe(struct hv_device *dev, dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); + init_completion(&dm_device.waiter_event); INIT_LIST_HEAD(&dm_device.ha_region_list); + mutex_init(&dm_device.ha_region_mutex); INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); dm_device.host_specified_ha_region = false; @@ -1401,6 +1455,7 @@ static int balloon_probe(struct hv_device *dev, #ifdef CONFIG_MEMORY_HOTPLUG set_online_page_callback(&hv_online_page); + register_memory_notifier(&hv_memory_nb); #endif hv_set_drvdata(dev, &dm_device); @@ -1519,6 +1574,7 @@ static int balloon_remove(struct hv_device *dev) kfree(send_buffer); #ifdef CONFIG_MEMORY_HOTPLUG restore_online_page_callback(&hv_online_page); + unregister_memory_notifier(&hv_memory_nb); #endif list_for_each_safe(cur, tmp, &dm->ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 23b2ce294c4c..cd453e4b2a07 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -86,6 +86,18 @@ static void fcopy_work_func(struct work_struct *dummy) * process the pending transaction. */ fcopy_respond_to_host(HV_E_FAIL); + + /* In the case the user-space daemon crashes, hangs or is killed, we + * need to down the semaphore, otherwise, after the daemon starts next + * time, the obsolete data in fcopy_transaction.message or + * fcopy_transaction.fcopy_msg will be used immediately. + * + * NOTE: fcopy_read() happens to get the semaphore (very rare)? We're + * still OK, because we've reported the failure to the host. + */ + if (down_trylock(&fcopy_transaction.read_sema)) + ; + } static int fcopy_handle_handshake(u32 version) @@ -344,6 +356,14 @@ static int fcopy_open(struct inode *inode, struct file *f) return 0; } +/* XXX: there are still some tricky corner cases, e.g., + * 1) In a SMP guest, when fcopy_release() runs between + * schedule_delayed_work() and fcopy_send_data(), there is + * still a chance an obsolete message will be queued. + * + * 2) When the fcopy daemon is running, if we unload the driver, + * we'll notice a kernel oops when we kill the daemon later. + */ static int fcopy_release(struct inode *inode, struct file *f) { /* @@ -351,6 +371,13 @@ static int fcopy_release(struct inode *inode, struct file *f) */ in_hand_shake = true; opened = false; + + if (cancel_delayed_work_sync(&fcopy_work)) { + /* We haven't up()-ed the semaphore(very rare)? */ + if (down_trylock(&fcopy_transaction.read_sema)) + ; + fcopy_respond_to_host(HV_E_FAIL); + } return 0; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index c386d8dc7223..44b1c9424712 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -178,6 +178,23 @@ struct hv_message_header { }; }; +/* + * Timer configuration register. + */ +union hv_timer_config { + u64 as_uint64; + struct { + u64 enable:1; + u64 periodic:1; + u64 lazy:1; + u64 auto_enable:1; + u64 reserved_z0:12; + u64 sintx:4; + u64 reserved_z1:44; + }; +}; + + /* Define timer message payload structure. */ struct hv_timer_message_payload { u32 timer_index; @@ -519,6 +536,10 @@ struct hv_context { * buffer to post messages to the host. */ void *post_msg_page[NR_CPUS]; + /* + * Support PV clockevent device. + */ + struct clock_event_device *clk_evt[NR_CPUS]; }; extern struct hv_context hv_context; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index bb3725b672cf..f518b8d7a5b5 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -32,6 +32,7 @@ #include <linux/completion.h> #include <linux/hyperv.h> #include <linux/kernel_stat.h> +#include <linux/clockchips.h> #include <asm/hyperv.h> #include <asm/hypervisor.h> #include <asm/mshyperv.h> @@ -578,6 +579,34 @@ static void vmbus_onmessage_work(struct work_struct *work) kfree(ctx); } +static void hv_process_timer_expiration(struct hv_message *msg, int cpu) +{ + struct clock_event_device *dev = hv_context.clk_evt[cpu]; + + if (dev->event_handler) + dev->event_handler(dev); + + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + static void vmbus_on_msg_dpc(unsigned long data) { int cpu = smp_processor_id(); @@ -667,8 +696,12 @@ static void vmbus_isr(void) msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; /* Check if there are actual msgs to be processed */ - if (msg->header.message_type != HVMSG_NONE) - tasklet_schedule(&msg_dpc); + if (msg->header.message_type != HVMSG_NONE) { + if (msg->header.message_type == HVMSG_TIMER_EXPIRED) + hv_process_timer_expiration(msg, cpu); + else + tasklet_schedule(&msg_dpc); + } } /* |