When the debugpat kernel boot flag is turned on the following
traces are printed:
[ 1884.793168] x86/PAT: Overlap at 0x90000000-0x92000000
[ 1884.803510] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff],
track uncached-minus, req write-combining, ret uncached-minus
[ 1884.818167] hfi1 0000:05:00.0: hfi1_0: WC Remapped RcvArray:
ffffc9000a980000
The ioremap_wc() clearly is not returning a write combining mapping due
to an overlap where the RcvArray is mapped in a uncached mapping prior
to creating the proposed write combining mapping.
The patch replaces the single base register for uncached CSRs that
used to overlap the RcvArray with two mappings. One, kregbase1, from the
bar0 up to the RcvArray and another, kregbase2, from the end of the
RcvArray to the pio send buffer space. A new dd field, base2_start,
is used to convert the zero-based offset in the CSR routines to the
correct kregbase1/kregbase2 mapping. A single direct write of the
RcvArray CSRs is replaced with hfi1_put_tid() to insure correct access
using the new disjoint mapping.
Additionally, the kregend field is deleted since it is only ever written.
patdebug now shows the RcvArray as write combining:
[ 35.688990] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff],
track write-combining, req write-combining, ret write-combining
To insulate from any potential issues with write combining, all
writeq are now flushed in hfi1_put_tid() and rcv_array_wc_fill().
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
CNTR_SYNTH, \
access_ibp_##cntr)
+/**
+ * hfi_addr_from_offset - return addr for readq/writeq
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * This routine selects the appropriate base address
+ * based on the indicated offset.
+ */
+static inline void __iomem *hfi1_addr_from_offset(
+ const struct hfi1_devdata *dd,
+ u32 offset)
+{
+ if (offset >= dd->base2_start)
+ return dd->kregbase2 + (offset - dd->base2_start);
+ return dd->kregbase1 + offset;
+}
+
+/**
+ * read_csr - read CSR at the indicated offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * Return: the value read or all FF's if there
+ * is no mapping
+ */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
{
- if (dd->flags & HFI1_PRESENT) {
- return readq((void __iomem *)dd->kregbase + offset);
- }
+ if (dd->flags & HFI1_PRESENT)
+ return readq(hfi1_addr_from_offset(dd, offset));
return -1;
}
+/**
+ * write_csr - write CSR at the indicated offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ * @value - value to write
+ */
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{
- if (dd->flags & HFI1_PRESENT)
- writeq(value, (void __iomem *)dd->kregbase + offset);
+ if (dd->flags & HFI1_PRESENT) {
+ void __iomem *base = hfi1_addr_from_offset(dd, offset);
+
+ /* avoid write to RcvArray */
+ if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start))
+ return;
+ writeq(value, base);
+ }
}
+/**
+ * get_csr_addr - return te iomem address for offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * Return: The iomem address to use in subsequent
+ * writeq/readq operations.
+ */
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset)
{
- return (void __iomem *)dd->kregbase + offset;
+ if (dd->flags & HFI1_PRESENT)
+ return hfi1_addr_from_offset(dd, offset);
+ return NULL;
}
static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
u32 type, unsigned long pa, u16 order)
{
u64 reg;
- void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
- (dd->kregbase + RCV_ARRAY));
if (!(dd->flags & HFI1_PRESENT))
goto done;
- if (type == PT_INVALID) {
+ if (type == PT_INVALID || type == PT_INVALID_FLUSH) {
pa = 0;
+ order = 0;
} else if (type > PT_INVALID) {
dd_dev_err(dd,
"unexpected receive array type %u for index %u, not handled\n",
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
<< RCV_ARRAY_RT_ADDR_SHIFT;
- trace_hfi1_write_rcvarray(base + (index * 8), reg);
- writeq(reg, base + (index * 8));
+ trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg);
+ writeq(reg, dd->rcvarray_wc + (index * 8));
- if (type == PT_EAGER)
+ if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3)
/*
- * Eager entries are written one-by-one so we have to push them
- * after we write the entry.
+ * Eager entries are written and flushed
+ *
+ * Expected entries are flushed every 4 writes
*/
flush_wc();
done:
/* RcvArray */
for (i = 0; i < dd->chip_rcv_array_count; i++)
- write_csr(dd, RCV_ARRAY + (8 * i),
- RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+ hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
/* RcvQPMapTable */
for (i = 0; i < 32; i++)
int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data);
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset);
static inline void __iomem *get_kctxt_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
int ctxt,
u32 offset0)
{
spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) {
- if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase)
+ if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1)
continue;
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
dd_dev_info(dd, "Reset on unit %u requested\n", unit);
- if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
+ if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) {
dd_dev_info(dd,
"Invalid unit number %u or not initialized or not present\n",
unit);
* Doing the WC fill writes only makes sense if the device is
* present and the RcvArray has been mapped as WC memory.
*/
- if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+ if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) {
writeq(0, dd->rcvarray_wc + (index * 8));
+ if ((index & 3) == 3)
+ flush_wc();
+ }
}
static inline void tid_group_add_tail(struct tid_group *grp,
struct hfi1_devdata,
user_cdev);
- if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
+ if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
return -EINVAL;
if (!atomic_inc_not_zero(&dd->user_refcount))
struct device *diag_device;
struct device *ui_device;
- /* mem-mapped pointer to base of chip regs */
- u8 __iomem *kregbase;
- /* end of mem-mapped chip space excluding sendbuf and user regs */
- u8 __iomem *kregend;
- /* physical address of chip for io_remap, etc. */
+ /* first mapping up to RcvArray */
+ u8 __iomem *kregbase1;
resource_size_t physaddr;
+
+ /* second uncached mapping from RcvArray to pio send buffers */
+ u8 __iomem *kregbase2;
+ /* for detecting offset above kregbase2 address */
+ u32 base2_start;
+
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
-#define PT_EXPECTED 0
-#define PT_EAGER 1
-#define PT_INVALID 2
+#define PT_EXPECTED 0
+#define PT_EAGER 1
+#define PT_INVALID_FLUSH 2
+#define PT_INVALID 3
struct tid_rb_node;
struct mmu_rb_node;
return -EINVAL;
}
- dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND);
- if (!dd->kregbase)
+ dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
+ if (!dd->kregbase1) {
+ dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
+ }
+ dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+ dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
+ dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
+ dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
+
+ dd->kregbase2 = ioremap_nocache(
+ addr + dd->base2_start,
+ TXE_PIO_SEND - dd->base2_start);
+ if (!dd->kregbase2) {
+ dd_dev_err(dd, "UC mapping of kregbase2 failed\n");
+ goto nomem;
+ }
+ dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2,
+ TXE_PIO_SEND - dd->base2_start);
dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE);
if (!dd->piobase) {
- iounmap(dd->kregbase);
- return -ENOMEM;
+ dd_dev_err(dd, "WC mapping of send buffers failed\n");
+ goto nomem;
}
+ dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
- dd->flags |= HFI1_PRESENT; /* now register routines work */
-
- dd->kregend = dd->kregbase + TXE_PIO_SEND;
dd->physaddr = addr; /* used for io_remap, etc. */
/*
- * Re-map the chip's RcvArray as write-combining to allow us
+ * Map the chip's RcvArray as write-combining to allow us
* to write an entire cacheline worth of entries in one shot.
- * If this re-map fails, just continue - the RcvArray programming
- * function will handle both cases.
*/
- dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT);
dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
dd->chip_rcv_array_count * 8);
- dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc);
+ if (!dd->rcvarray_wc) {
+ dd_dev_err(dd, "WC mapping of receive array failed\n");
+ goto nomem;
+ }
+ dd_dev_info(dd, "WC RcvArray: %p for %x\n",
+ dd->rcvarray_wc, dd->chip_rcv_array_count * 8);
/*
* Save BARs and command to rewrite after device reset.
*/
if (ret)
goto read_error;
+ dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
return 0;
read_error:
dd_dev_err(dd, "Unable to read from PCI config\n");
+ goto bail_error;
+nomem:
+ ret = -ENOMEM;
+bail_error:
+ hfi1_pcie_ddcleanup(dd);
return ret;
}
*/
void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
{
- u64 __iomem *base = (void __iomem *)dd->kregbase;
-
dd->flags &= ~HFI1_PRESENT;
- dd->kregbase = NULL;
- iounmap(base);
+ if (dd->kregbase1)
+ iounmap(dd->kregbase1);
+ dd->kregbase1 = NULL;
+ if (dd->kregbase2)
+ iounmap(dd->kregbase2);
+ dd->kregbase2 = NULL;
if (dd->rcvarray_wc)
iounmap(dd->rcvarray_wc);
+ dd->rcvarray_wc = NULL;
if (dd->piobase)
iounmap(dd->piobase);
+ dd->piobase = NULL;
}
/* return the PCIe link speed from the given link status */
node->npages, node->mmu.addr, node->phys,
node->dma_addr);
- hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
/*
* Make sure device has seen the write before we unpin the
* pages.
*/
- flush_wc();
+ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);