xen: optimize xenbus driver for multiple concurrent xenstore accesses

Handling of multiple concurrent Xenstore accesses through xenbus driver either from the kernel or user land is rather lame today: xenbus is capable to have one access active only at one point of time. Rewrite xenbus to handle multiple requests concurrently by making use of the request id of the Xenstore protocol. This requires to: - Instead of blocking inside xb_read() when trying to read data from the xenstore ring buffer do so only in the main loop of xenbus_thread(). - Instead of doing writes to the xenstore ring buffer in the context of the caller just queue the request and do the write in the dedicated xenbus thread. - Instead of just forwarding the request id specified by the caller of xenbus to xenstore use a xenbus internal unique request id. This will allow multiple outstanding requests. - Modify the locking scheme in order to allow multiple requests being active in parallel. - Instead of waiting for the reply of a user's xenstore request after writing the request to the xenstore ring buffer return directly to the caller and do the waiting in the read path. Additionally signal handling was optimized by avoiding waking up the xenbus thread or sending an event to Xenstore in case the addressed entity is known to be running already. As a result communication with Xenstore is sped up by a factor of up to 5: depending on the request type (read or write) and the amount of data transferred the gain was at least 20% (small reads) and went up to a factor of 5 for large writes. In the end some more rough edges of xenbus have been smoothed: - Handling of memory shortage when reading from xenstore ring buffer in the xenbus driver was not optimal: it was busy looping and issuing a warning in each loop. - In case of xenstore not running in dom0 but in a stubdom we end up with two xenbus threads running as the initialization of xenbus in dom0 expecting a local xenstored will be redone later when connecting to the xenstore domain. Up to now this was no problem as locking would prevent the two xenbus threads interfering with each other, but this was just a waste of kernel resources. - An out of memory situation while writing to or reading from the xenstore ring buffer no longer will lead to a possible loss of synchronization with xenstore. - The user read and write part are now interruptible by signals. Signed-off-by: Juergen Gross <jgross@suse.com> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
author: Juergen Gross <jgross@suse.com> 2017-02-09 14:39:58 +0100
committer: Boris Ostrovsky <boris.ostrovsky@oracle.com> 2017-02-09 11:26:49 -0500
commit: fd8aa9095a95c02dcc35540a263267c29b8fda9d (patch)
tree: a4e924ea2a50993b2c055c263fc1acd48ac99cb1 /drivers/xen/xenbus/xenbus_dev_frontend.c
parent: 5584ea250ae44f929feb4c7bd3877d1c5edbf813 (diff)
download: linux-fd8aa9095a95c02dcc35540a263267c29b8fda9d.tar.gz
linux-fd8aa9095a95c02dcc35540a263267c29b8fda9d.tar.bz2
linux-fd8aa9095a95c02dcc35540a263267c29b8fda9d.zip
1 files changed, 120 insertions, 68 deletions
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index e4b984777507..4d343eed08f5 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -113,6 +113,7 @@ struct xenbus_file_priv {
 	struct list_head read_buffers;
 	wait_queue_head_t read_waitq;
 
+	struct kref kref;
 };
 
 /* Read out any raw xenbus messages queued up. */
@@ -297,6 +298,107 @@ static void watch_fired(struct xenbus_watch *watch,
 	mutex_unlock(&adap->dev_data->reply_mutex);
 }
 
+static void xenbus_file_free(struct kref *kref)
+{
+	struct xenbus_file_priv *u;
+	struct xenbus_transaction_holder *trans, *tmp;
+	struct watch_adapter *watch, *tmp_watch;
+	struct read_buffer *rb, *tmp_rb;
+
+	u = container_of(kref, struct xenbus_file_priv, kref);
+
+	/*
+	 * No need for locking here because there are no other users,
+	 * by definition.
+	 */
+
+	list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
+		xenbus_transaction_end(trans->handle, 1);
+		list_del(&trans->list);
+		kfree(trans);
+	}
+
+	list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+		unregister_xenbus_watch(&watch->watch);
+		list_del(&watch->list);
+		free_watch_adapter(watch);
+	}
+
+	list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
+		list_del(&rb->list);
+		kfree(rb);
+	}
+	kfree(u);
+}
+
+static struct xenbus_transaction_holder *xenbus_get_transaction(
+	struct xenbus_file_priv *u, uint32_t tx_id)
+{
+	struct xenbus_transaction_holder *trans;
+
+	list_for_each_entry(trans, &u->transactions, list)
+		if (trans->handle.id == tx_id)
+			return trans;
+
+	return NULL;
+}
+
+void xenbus_dev_queue_reply(struct xb_req_data *req)
+{
+	struct xenbus_file_priv *u = req->par;
+	struct xenbus_transaction_holder *trans = NULL;
+	int rc;
+	LIST_HEAD(staging_q);
+
+	xs_request_exit(req);
+
+	mutex_lock(&u->msgbuffer_mutex);
+
+	if (req->type == XS_TRANSACTION_START) {
+		trans = xenbus_get_transaction(u, 0);
+		if (WARN_ON(!trans))
+			goto out;
+		if (req->msg.type == XS_ERROR) {
+			list_del(&trans->list);
+			kfree(trans);
+		} else {
+			rc = kstrtou32(req->body, 10, &trans->handle.id);
+			if (WARN_ON(rc))
+				goto out;
+		}
+	} else if (req->msg.type == XS_TRANSACTION_END) {
+		trans = xenbus_get_transaction(u, req->msg.tx_id);
+		if (WARN_ON(!trans))
+			goto out;
+		list_del(&trans->list);
+		kfree(trans);
+	}
+
+	mutex_unlock(&u->msgbuffer_mutex);
+
+	mutex_lock(&u->reply_mutex);
+	rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg));
+	if (!rc)
+		rc = queue_reply(&staging_q, req->body, req->msg.len);
+	if (!rc) {
+		list_splice_tail(&staging_q, &u->read_buffers);
+		wake_up(&u->read_waitq);
+	} else {
+		queue_cleanup(&staging_q);
+	}
+	mutex_unlock(&u->reply_mutex);
+
+	kfree(req->body);
+	kfree(req);
+
+	kref_put(&u->kref, xenbus_file_free);
+
+	return;
+
+ out:
+	mutex_unlock(&u->msgbuffer_mutex);
+}
+
 static int xenbus_command_reply(struct xenbus_file_priv *u,
 				unsigned int msg_type, const char *reply)
 {
@@ -317,6 +419,9 @@ static int xenbus_command_reply(struct xenbus_file_priv *u,
 	wake_up(&u->read_waitq);
 	mutex_unlock(&u->reply_mutex);
 
+	if (!rc)
+		kref_put(&u->kref, xenbus_file_free);
+
 	return rc;
 }
 
@@ -324,57 +429,22 @@ static int xenbus_write_transaction(unsigned msg_type,
 				    struct xenbus_file_priv *u)
 {
 	int rc;
-	void *reply;
 	struct xenbus_transaction_holder *trans = NULL;
-	LIST_HEAD(staging_q);
 
 	if (msg_type == XS_TRANSACTION_START) {
-		trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+		trans = kzalloc(sizeof(*trans), GFP_KERNEL);
 		if (!trans) {
 			rc = -ENOMEM;
 			goto out;
 		}
-	} else if (u->u.msg.tx_id != 0) {
-		list_for_each_entry(trans, &u->transactions, list)
-			if (trans->handle.id == u->u.msg.tx_id)
-				break;
-		if (&trans->list == &u->transactions)
-			return xenbus_command_reply(u, XS_ERROR, "ENOENT");
-	}
-
-	reply = xenbus_dev_request_and_reply(&u->u.msg);
-	if (IS_ERR(reply)) {
-		if (msg_type == XS_TRANSACTION_START)
-			kfree(trans);
-		rc = PTR_ERR(reply);
-		goto out;
-	}
+		list_add(&trans->list, &u->transactions);
+	} else if (u->u.msg.tx_id != 0 &&
+		   !xenbus_get_transaction(u, u->u.msg.tx_id))
+		return xenbus_command_reply(u, XS_ERROR, "ENOENT");
 
-	if (msg_type == XS_TRANSACTION_START) {
-		if (u->u.msg.type == XS_ERROR)
-			kfree(trans);
-		else {
-			trans->handle.id = simple_strtoul(reply, NULL, 0);
-			list_add(&trans->list, &u->transactions);
-		}
-	} else if (u->u.msg.type == XS_TRANSACTION_END) {
-		list_del(&trans->list);
+	rc = xenbus_dev_request_and_reply(&u->u.msg, u);
+	if (rc)
 		kfree(trans);
-	}
-
-	mutex_lock(&u->reply_mutex);
-	rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
-	if (!rc)
-		rc = queue_reply(&staging_q, reply, u->u.msg.len);
-	if (!rc) {
-		list_splice_tail(&staging_q, &u->read_buffers);
-		wake_up(&u->read_waitq);
-	} else {
-		queue_cleanup(&staging_q);
-	}
-	mutex_unlock(&u->reply_mutex);
-
-	kfree(reply);
 
 out:
 	return rc;
@@ -506,6 +576,8 @@ static ssize_t xenbus_file_write(struct file *filp,
 	 * OK, now we have a complete message.  Do something with it.
 	 */
 
+	kref_get(&u->kref);
+
 	msg_type = u->u.msg.type;
 
 	switch (msg_type) {
@@ -520,8 +592,10 @@ static ssize_t xenbus_file_write(struct file *filp,
 		ret = xenbus_write_transaction(msg_type, u);
 		break;
 	}
-	if (ret != 0)
+	if (ret != 0) {
 		rc = ret;
+		kref_put(&u->kref, xenbus_file_free);
+	}
 
 	/* Buffered message consumed */
 	u->len = 0;
@@ -546,6 +620,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
 	if (u == NULL)
 		return -ENOMEM;
 
+	kref_init(&u->kref);
+
 	INIT_LIST_HEAD(&u->transactions);
 	INIT_LIST_HEAD(&u->watches);
 	INIT_LIST_HEAD(&u->read_buffers);
@@ -562,32 +638,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
 static int xenbus_file_release(struct inode *inode, struct file *filp)
 {
 	struct xenbus_file_priv *u = filp->private_data;
-	struct xenbus_transaction_holder *trans, *tmp;
-	struct watch_adapter *watch, *tmp_watch;
-	struct read_buffer *rb, *tmp_rb;
-
-	/*
-	 * No need for locking here because there are no other users,
-	 * by definition.
-	 */
 
-	list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
-		xenbus_transaction_end(trans->handle, 1);
-		list_del(&trans->list);
-		kfree(trans);
-	}
-
-	list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
-		unregister_xenbus_watch(&watch->watch);
-		list_del(&watch->list);
-		free_watch_adapter(watch);
-	}
-
-	list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
-		list_del(&rb->list);
-		kfree(rb);
-	}
-	kfree(u);
+	kref_put(&u->kref, xenbus_file_free);
 
 	return 0;
 }
author	Juergen Gross <jgross@suse.com>	2017-02-09 14:39:58 +0100
committer	Boris Ostrovsky <boris.ostrovsky@oracle.com>	2017-02-09 11:26:49 -0500
commit	fd8aa9095a95c02dcc35540a263267c29b8fda9d (patch)
tree	a4e924ea2a50993b2c055c263fc1acd48ac99cb1 /drivers/xen/xenbus/xenbus_dev_frontend.c
parent	5584ea250ae44f929feb4c7bd3877d1c5edbf813 (diff)
download	linux-fd8aa9095a95c02dcc35540a263267c29b8fda9d.tar.gz linux-fd8aa9095a95c02dcc35540a263267c29b8fda9d.tar.bz2 linux-fd8aa9095a95c02dcc35540a263267c29b8fda9d.zip