From 448e2ca0e32a5c437650d634b6032ab732662338 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 17 Feb 2009 00:21:56 +0000 Subject: [PATCH] powerpc/pseries: Implement a quota system for MSIs There are hardware limitations on the number of available MSIs, which firmware expresses using a property named "ibm,pe-total-#msi". This property tells us how many MSIs are available for devices below the point in the PCI tree where we find the property. For old firmwares which don't have the property, we assume there are 8 MSIs available per "partitionable endpoint" (PE). The PE can be found using existing EEH code, which uses the methods described in PAPR. For our purposes we want the parent of the node that's identified using this method. When a driver requests n MSIs for a device, we first establish where the "ibm,pe-total-#msi" property above that device is, or we find the PE if the property is not found. In both cases we call this node the "pe_dn". We then count all non-bridge devices below the pe_dn, to establish how many devices in total may need MSIs. The quota is then simply the total available divided by the number of devices, if the request is less than or equal to the quota, the request is fine and we're done. If the request is greater than the quota, we try to determine if there are any "spare" MSIs which we can give to this device. Spare MSIs are found by looking for other devices which can never use their full quota, because their "req#msi(-x)" property is less than the quota. If we find any spare, we divide the spares by the number of devices that could request more than their quota. This ensures the spare MSIs are spread evenly amongst all over-quota requestors. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 178 ++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 081af6d7fa02..3e0d6ef3eca9 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -174,12 +174,186 @@ static int check_req_msix(struct pci_dev *pdev, int nvec) return check_req(pdev, nvec, "ibm,req#msi-x"); } +/* Quota calculation */ + +static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +{ + struct device_node *dn; + const u32 *p; + + dn = of_node_get(pci_device_to_OF_node(dev)); + while (dn) { + p = of_get_property(dn, "ibm,pe-total-#msi", NULL); + if (p) { + pr_debug("rtas_msi: found prop on dn %s\n", + dn->full_name); + *total = *p; + return dn; + } + + dn = of_get_next_parent(dn); + } + + return NULL; +} + +static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) +{ + struct device_node *dn; + + /* Found our PE and assume 8 at that point. */ + + dn = pci_device_to_OF_node(dev); + if (!dn) + return NULL; + + dn = find_device_pe(dn); + if (!dn) + return NULL; + + /* We actually want the parent */ + dn = of_get_parent(dn); + if (!dn) + return NULL; + + /* Hardcode of 8 for old firmwares */ + *total = 8; + pr_debug("rtas_msi: using PE dn %s\n", dn->full_name); + + return dn; +} + +struct msi_counts { + struct device_node *requestor; + int num_devices; + int request; + int quota; + int spare; + int over_quota; +}; + +static void *count_non_bridge_devices(struct device_node *dn, void *data) +{ + struct msi_counts *counts = data; + const u32 *p; + u32 class; + + pr_debug("rtas_msi: counting %s\n", dn->full_name); + + p = of_get_property(dn, "class-code", NULL); + class = p ? *p : 0; + + if ((class >> 8) != PCI_CLASS_BRIDGE_PCI) + counts->num_devices++; + + return NULL; +} + +static void *count_spare_msis(struct device_node *dn, void *data) +{ + struct msi_counts *counts = data; + const u32 *p; + int req; + + if (dn == counts->requestor) + req = counts->request; + else { + /* We don't know if a driver will try to use MSI or MSI-X, + * so we just have to punt and use the larger of the two. */ + req = 0; + p = of_get_property(dn, "ibm,req#msi", NULL); + if (p) + req = *p; + + p = of_get_property(dn, "ibm,req#msi-x", NULL); + if (p) + req = max(req, (int)*p); + } + + if (req < counts->quota) + counts->spare += counts->quota - req; + else if (req > counts->quota) + counts->over_quota++; + + return NULL; +} + +static int msi_quota_for_device(struct pci_dev *dev, int request) +{ + struct device_node *pe_dn; + struct msi_counts counts; + int total; + + pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev), + request); + + pe_dn = find_pe_total_msi(dev, &total); + if (!pe_dn) + pe_dn = find_pe_dn(dev, &total); + + if (!pe_dn) { + pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev)); + goto out; + } + + pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name); + + memset(&counts, 0, sizeof(struct msi_counts)); + + /* Work out how many devices we have below this PE */ + traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts); + + if (counts.num_devices == 0) { + pr_err("rtas_msi: found 0 devices under PE for %s\n", + pci_name(dev)); + goto out; + } + + counts.quota = total / counts.num_devices; + if (request <= counts.quota) + goto out; + + /* else, we have some more calculating to do */ + counts.requestor = pci_device_to_OF_node(dev); + counts.request = request; + traverse_pci_devices(pe_dn, count_spare_msis, &counts); + + /* If the quota isn't an integer multiple of the total, we can + * use the remainder as spare MSIs for anyone that wants them. */ + counts.spare += total % counts.num_devices; + + /* Divide any spare by the number of over-quota requestors */ + if (counts.over_quota) + counts.quota += counts.spare / counts.over_quota; + + /* And finally clamp the request to the possibly adjusted quota */ + request = min(counts.quota, request); + + pr_debug("rtas_msi: request clamped to quota %d\n", request); +out: + of_node_put(pe_dn); + + return request; +} + static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type) { + int quota, rc; + if (type == PCI_CAP_ID_MSIX) - return check_req_msix(pdev, nvec); + rc = check_req_msix(pdev, nvec); + else + rc = check_req_msi(pdev, nvec); + + if (rc) + return rc; - return check_req_msi(pdev, nvec); + quota = msi_quota_for_device(pdev, nvec); + + if (quota && quota < nvec) + return quota; + + return 0; } static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -- 2.30.2