nvmet: Optionally use PCI P2P memory

author Logan Gunthorpe <logang@deltatee.com>

Thu, 4 Oct 2018 21:27:47 +0000 (15:27 -0600)

committer Bjorn Helgaas <bhelgaas@google.com>

Wed, 17 Oct 2018 17:18:24 +0000 (12:18 -0500)
author Logan Gunthorpe <logang@deltatee.com>
Thu, 4 Oct 2018 21:27:47 +0000 (15:27 -0600)
committer Bjorn Helgaas <bhelgaas@google.com>
Wed, 17 Oct 2018 17:18:24 +0000 (12:18 -0500)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c

index b37a8e3e3f80012ff28a490e28d3583fc6664700..d895579b6c5dc19477a4ccea2597c3092d15ab1e 100644 (file)
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -17,6 +17,8 @@
  #include <linux/slab.h>
  #include <linux/stat.h>
  #include <linux/ctype.h>
+#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
  
  #include "nvmet.h"
  
@@ -340,6 +342,48 @@ out_unlock:
  
  CONFIGFS_ATTR(nvmet_ns_, device_path);
  
+#ifdef CONFIG_PCI_P2PDMA
+static ssize_t nvmet_ns_p2pmem_show(struct config_item *item, char *page)
+{
+       struct nvmet_ns *ns = to_nvmet_ns(item);
+
+       return pci_p2pdma_enable_show(page, ns->p2p_dev, ns->use_p2pmem);
+}
+
+static ssize_t nvmet_ns_p2pmem_store(struct config_item *item,
+               const char *page, size_t count)
+{
+       struct nvmet_ns *ns = to_nvmet_ns(item);
+       struct pci_dev *p2p_dev = NULL;
+       bool use_p2pmem;
+       int ret = count;
+       int error;
+
+       mutex_lock(&ns->subsys->lock);
+       if (ns->enabled) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+
+       error = pci_p2pdma_enable_store(page, &p2p_dev, &use_p2pmem);
+       if (error) {
+               ret = error;
+               goto out_unlock;
+       }
+
+       ns->use_p2pmem = use_p2pmem;
+       pci_dev_put(ns->p2p_dev);
+       ns->p2p_dev = p2p_dev;
+
+out_unlock:
+       mutex_unlock(&ns->subsys->lock);
+
+       return ret;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, p2pmem);
+#endif /* CONFIG_PCI_P2PDMA */
+
  static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
  {
         return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
@@ -509,6 +553,9 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
         &nvmet_ns_attr_ana_grpid,
         &nvmet_ns_attr_enable,
         &nvmet_ns_attr_buffered_io,
+#ifdef CONFIG_PCI_P2PDMA
+       &nvmet_ns_attr_p2pmem,
+#endif
         NULL,
  };
  
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c

index 310b9fb54f6a92123fc584ec0c9a55e277efff80..9b4d84cfc2245b60d8f9703070eec703aead7da7 100644 (file)
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -15,6 +15,7 @@
  #include <linux/module.h>
  #include <linux/random.h>
  #include <linux/rculist.h>
+#include <linux/pci-p2pdma.h>
  
  #include "nvmet.h"
  
@@ -365,9 +366,93 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
         nvmet_file_ns_disable(ns);
  }
  
+static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
+{
+       int ret;
+       struct pci_dev *p2p_dev;
+
+       if (!ns->use_p2pmem)
+               return 0;
+
+       if (!ns->bdev) {
+               pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
+               return -EINVAL;
+       }
+
+       if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
+               pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
+                      ns->device_path);
+               return -EINVAL;
+       }
+
+       if (ns->p2p_dev) {
+               ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
+               if (ret < 0)
+                       return -EINVAL;
+       } else {
+               /*
+                * Right now we just check that there is p2pmem available so
+                * we can report an error to the user right away if there
+                * is not. We'll find the actual device to use once we
+                * setup the controller when the port's device is available.
+                */
+
+               p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
+               if (!p2p_dev) {
+                       pr_err("no peer-to-peer memory is available for %s\n",
+                              ns->device_path);
+                       return -EINVAL;
+               }
+
+               pci_dev_put(p2p_dev);
+       }
+
+       return 0;
+}
+
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
+                                   struct nvmet_ns *ns)
+{
+       struct device *clients[2];
+       struct pci_dev *p2p_dev;
+       int ret;
+
+       if (!ctrl->p2p_client)
+               return;
+
+       if (ns->p2p_dev) {
+               ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
+               if (ret < 0)
+                       return;
+
+               p2p_dev = pci_dev_get(ns->p2p_dev);
+       } else {
+               clients[0] = ctrl->p2p_client;
+               clients[1] = nvmet_ns_dev(ns);
+
+               p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
+               if (!p2p_dev) {
+                       pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
+                              dev_name(ctrl->p2p_client), ns->device_path);
+                       return;
+               }
+       }
+
+       ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
+       if (ret < 0)
+               pci_dev_put(p2p_dev);
+
+       pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
+               ns->nsid);
+}
+
  int nvmet_ns_enable(struct nvmet_ns *ns)
  {
         struct nvmet_subsys *subsys = ns->subsys;
+       struct nvmet_ctrl *ctrl;
         int ret;
  
         mutex_lock(&subsys->lock);
@@ -384,6 +469,13 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
         if (ret)
                 goto out_unlock;
  
+       ret = nvmet_p2pmem_ns_enable(ns);
+       if (ret)
+               goto out_unlock;
+
+       list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+               nvmet_p2pmem_ns_add_p2p(ctrl, ns);
+
         ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
                                 0, GFP_KERNEL);
         if (ret)
@@ -418,6 +510,9 @@ out_unlock:
         mutex_unlock(&subsys->lock);
         return ret;
  out_dev_put:
+       list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+               pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
+
         nvmet_ns_dev_disable(ns);
         goto out_unlock;
  }
@@ -425,6 +520,7 @@ out_dev_put:
  void nvmet_ns_disable(struct nvmet_ns *ns)
  {
         struct nvmet_subsys *subsys = ns->subsys;
+       struct nvmet_ctrl *ctrl;
  
         mutex_lock(&subsys->lock);
         if (!ns->enabled)
@@ -434,6 +530,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
         list_del_rcu(&ns->dev_link);
         if (ns->nsid == subsys->max_nsid)
                 subsys->max_nsid = nvmet_max_nsid(subsys);
+
+       list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+               pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
+
         mutex_unlock(&subsys->lock);
  
         /*
@@ -450,6 +550,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
         percpu_ref_exit(&ns->ref);
  
         mutex_lock(&subsys->lock);
+
         subsys->nr_namespaces--;
         nvmet_ns_changed(subsys, ns->nsid);
         nvmet_ns_dev_disable(ns);
@@ -727,6 +828,29 @@ EXPORT_SYMBOL_GPL(nvmet_req_execute);
  
  int nvmet_req_alloc_sgl(struct nvmet_req *req)
  {
+       struct pci_dev *p2p_dev = NULL;
+
+       if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
+               if (req->sq->ctrl && req->ns)
+                       p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
+                                                   req->ns->nsid);
+
+               req->p2p_dev = NULL;
+               if (req->sq->qid && p2p_dev) {
+                       req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
+                                                      req->transfer_len);
+                       if (req->sg) {
+                               req->p2p_dev = p2p_dev;
+                               return 0;
+                       }
+               }
+
+               /*
+                * If no P2P memory was available we fallback to using
+                * regular memory
+                */
+       }
+
         req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
         if (!req->sg)
                 return -ENOMEM;
@@ -737,7 +861,11 @@ EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
  
  void nvmet_req_free_sgl(struct nvmet_req *req)
  {
-       sgl_free(req->sg);
+       if (req->p2p_dev)
+               pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+       else
+               sgl_free(req->sg);
+
         req->sg = NULL;
         req->sg_cnt = 0;
  }
@@ -939,6 +1067,37 @@ bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
                 return __nvmet_host_allowed(subsys, hostnqn);
  }
  
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
+               struct nvmet_req *req)
+{
+       struct nvmet_ns *ns;
+
+       if (!req->p2p_client)
+               return;
+
+       ctrl->p2p_client = get_device(req->p2p_client);
+
+       list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+               nvmet_p2pmem_ns_add_p2p(ctrl, ns);
+}
+
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
+{
+       struct radix_tree_iter iter;
+       void __rcu **slot;
+
+       radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
+               pci_dev_put(radix_tree_deref_slot(slot));
+
+       put_device(ctrl->p2p_client);
+}
+
  u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
                 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
  {
@@ -980,6 +1139,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
  
         INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
         INIT_LIST_HEAD(&ctrl->async_events);
+       INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
  
         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
@@ -1044,6 +1204,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
  
         mutex_lock(&subsys->lock);
         list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+       nvmet_setup_p2p_ns_map(ctrl, req);
         mutex_unlock(&subsys->lock);
  
         *ctrlp = ctrl;
@@ -1071,6 +1232,7 @@ static void nvmet_ctrl_free(struct kref *ref)
         struct nvmet_subsys *subsys = ctrl->subsys;
  
         mutex_lock(&subsys->lock);
+       nvmet_release_p2p_ns_map(ctrl);
         list_del(&ctrl->subsys_entry);
         mutex_unlock(&subsys->lock);
  
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c

index 7bc9f624043296c2bd71d625b6a7ec36d9319015..5660dd7ca75515f0ea257a9ae5d14d964e24576d 100644 (file)
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -78,6 +78,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
                 op = REQ_OP_READ;
         }
  
+       if (is_pci_p2pdma_page(sg_page(req->sg)))
+               op_flags |= REQ_NOMERGE;
+
         sector = le64_to_cpu(req->cmd->rw.slba);
         sector <<= (req->ns->blksize_shift - 9);
  
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h

index e7b7406c4e22b6ecee6f0e5347164ff5fd758ce7..d6be098f342b349bca0952cb5aa376b6db24d1a3 100644 (file)
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -26,6 +26,7 @@
  #include <linux/configfs.h>
  #include <linux/rcupdate.h>
  #include <linux/blkdev.h>
+#include <linux/radix-tree.h>
  
  #define NVMET_ASYNC_EVENTS             4
  #define NVMET_ERROR_LOG_SLOTS          128
@@ -77,6 +78,9 @@ struct nvmet_ns {
         struct completion       disable_done;
         mempool_t               *bvec_pool;
         struct kmem_cache       *bvec_cache;
+
+       int                     use_p2pmem;
+       struct pci_dev          *p2p_dev;
  };
  
  static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -84,6 +88,11 @@ static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
         return container_of(to_config_group(item), struct nvmet_ns, group);
  }
  
+static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
+{
+       return ns->bdev ? disk_to_dev(ns->bdev->bd_disk) : NULL;
+}
+
  struct nvmet_cq {
         u16                     qid;
         u16                     size;
@@ -184,6 +193,9 @@ struct nvmet_ctrl {
  
         char                    subsysnqn[NVMF_NQN_FIELD_LEN];
         char                    hostnqn[NVMF_NQN_FIELD_LEN];
+
+       struct device *p2p_client;
+       struct radix_tree_root p2p_ns_map;
  };
  
  struct nvmet_subsys {
@@ -294,6 +306,9 @@ struct nvmet_req {
  
         void (*execute)(struct nvmet_req *req);
         const struct nvmet_fabrics_ops *ops;
+
+       struct pci_dev *p2p_dev;
+       struct device *p2p_client;
  };
  
  extern struct workqueue_struct *buffered_io_wq;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c

index 9e091e78a2f06221a13fdf48d84d980f19d704f2..3f7971d3706d90d5fbf382072ed8d2da2ac6e8b6 100644 (file)
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -749,6 +749,8 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
                 cmd->send_sge.addr, cmd->send_sge.length,
                 DMA_TO_DEVICE);
  
+       cmd->req.p2p_client = &queue->dev->device->dev;
+
         if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
                         &queue->nvme_sq, &nvmet_rdma_ops))
                 return;
author	Logan Gunthorpe <logang@deltatee.com>
	Thu, 4 Oct 2018 21:27:47 +0000 (15:27 -0600)
committer	Bjorn Helgaas <bhelgaas@google.com>
	Wed, 17 Oct 2018 17:18:24 +0000 (12:18 -0500)
drivers/nvme/target/configfs.c		patch \| blob \| history
drivers/nvme/target/core.c		patch \| blob \| history
drivers/nvme/target/io-cmd-bdev.c		patch \| blob \| history
drivers/nvme/target/nvmet.h		patch \| blob \| history
drivers/nvme/target/rdma.c		patch \| blob \| history