vhost_net: split out datacopy logic
authorJason Wang <jasowang@redhat.com>
Fri, 20 Jul 2018 00:15:18 +0000 (08:15 +0800)
committerDavid S. Miller <davem@davemloft.net>
Sun, 22 Jul 2018 16:43:31 +0000 (09:43 -0700)
Instead of mixing zerocopy and datacopy logics, this patch tries to
split datacopy logic out. This results for a more compact code and
ad-hoc optimization could be done on top more easily.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/vhost/net.c

index f59b615e29892e1fb763224f617c6ca179d299be..9cef0b2502b05f22681821986b87e0e709b8e810 100644 (file)
@@ -520,9 +520,7 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
               !vhost_vq_avail_empty(vq->dev, vq);
 }
 
-/* Expects to be always run from workqueue - which acts as
- * read-size critical section for our kind of RCU. */
-static void handle_tx(struct vhost_net *net)
+static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
 {
        struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
        struct vhost_virtqueue *vq = &nvq->vq;
@@ -537,30 +535,76 @@ static void handle_tx(struct vhost_net *net)
        };
        size_t len, total_len = 0;
        int err;
-       struct socket *sock;
-       struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
-       bool zcopy, zcopy_used;
        int sent_pkts = 0;
 
-       mutex_lock(&vq->mutex);
-       sock = vq->private_data;
-       if (!sock)
-               goto out;
+       for (;;) {
+               bool busyloop_intr = false;
 
-       if (!vq_iotlb_prefetch(vq))
-               goto out;
+               head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
+                                  &busyloop_intr);
+               /* On error, stop handling until the next kick. */
+               if (unlikely(head < 0))
+                       break;
+               /* Nothing new?  Wait for eventfd to tell us they refilled. */
+               if (head == vq->num) {
+                       if (unlikely(busyloop_intr)) {
+                               vhost_poll_queue(&vq->poll);
+                       } else if (unlikely(vhost_enable_notify(&net->dev,
+                                                               vq))) {
+                               vhost_disable_notify(&net->dev, vq);
+                               continue;
+                       }
+                       break;
+               }
 
-       vhost_disable_notify(&net->dev, vq);
-       vhost_net_disable_vq(net, vq);
+               total_len += len;
+               if (tx_can_batch(vq, total_len))
+                       msg.msg_flags |= MSG_MORE;
+               else
+                       msg.msg_flags &= ~MSG_MORE;
+
+               /* TODO: Check specific error and bomb out unless ENOBUFS? */
+               err = sock->ops->sendmsg(sock, &msg, len);
+               if (unlikely(err < 0)) {
+                       vhost_discard_vq_desc(vq, 1);
+                       vhost_net_enable_vq(net, vq);
+                       break;
+               }
+               if (err != len)
+                       pr_debug("Truncated TX packet: len %d != %zd\n",
+                                err, len);
+               vhost_add_used_and_signal(&net->dev, vq, head, 0);
+               if (vhost_exceeds_weight(++sent_pkts, total_len)) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+       }
+}
 
-       zcopy = nvq->ubufs;
+static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
+{
+       struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+       struct vhost_virtqueue *vq = &nvq->vq;
+       unsigned out, in;
+       int head;
+       struct msghdr msg = {
+               .msg_name = NULL,
+               .msg_namelen = 0,
+               .msg_control = NULL,
+               .msg_controllen = 0,
+               .msg_flags = MSG_DONTWAIT,
+       };
+       size_t len, total_len = 0;
+       int err;
+       struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+       bool zcopy_used;
+       int sent_pkts = 0;
 
        for (;;) {
                bool busyloop_intr;
 
                /* Release DMAs done buffers first */
-               if (zcopy)
-                       vhost_zerocopy_signal_used(net, vq);
+               vhost_zerocopy_signal_used(net, vq);
 
                busyloop_intr = false;
                head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
@@ -579,9 +623,9 @@ static void handle_tx(struct vhost_net *net)
                        break;
                }
 
-               zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
-                                  && !vhost_exceeds_maxpend(net)
-                                  && vhost_net_tx_select_zcopy(net);
+               zcopy_used = len >= VHOST_GOODCOPY_LEN
+                            && !vhost_exceeds_maxpend(net)
+                            && vhost_net_tx_select_zcopy(net);
 
                /* use msg_control to pass vhost zerocopy ubuf info to skb */
                if (zcopy_used) {
@@ -636,6 +680,32 @@ static void handle_tx(struct vhost_net *net)
                        break;
                }
        }
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_tx(struct vhost_net *net)
+{
+       struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+       struct vhost_virtqueue *vq = &nvq->vq;
+       struct socket *sock;
+
+       mutex_lock(&vq->mutex);
+       sock = vq->private_data;
+       if (!sock)
+               goto out;
+
+       if (!vq_iotlb_prefetch(vq))
+               goto out;
+
+       vhost_disable_notify(&net->dev, vq);
+       vhost_net_disable_vq(net, vq);
+
+       if (vhost_sock_zcopy(sock))
+               handle_tx_zerocopy(net, sock);
+       else
+               handle_tx_copy(net, sock);
+
 out:
        mutex_unlock(&vq->mutex);
 }