drm/i915: Always allocate an object/vma for the HWSP
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 28 Jan 2019 10:23:55 +0000 (10:23 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 28 Jan 2019 16:24:19 +0000 (16:24 +0000)
Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-4-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_guc_submission.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/selftests/mock_engine.c

index 1a5c163b98d6f9eff4aead16c9443f0aecb92499..2657eb6fd914568278c07bdd1be991a1ea3f952f 100644 (file)
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
+       struct i915_vma *vma;
+
        /* Prevent writes into HWSP after returning the page to the system */
        intel_engine_set_hwsp_writemask(engine, ~0u);
 
-       if (HWS_NEEDS_PHYSICAL(engine->i915)) {
-               void *addr = fetch_and_zero(&engine->status_page.page_addr);
+       vma = fetch_and_zero(&engine->status_page.vma);
+       if (!vma)
+               return;
 
-               __free_page(virt_to_page(addr));
-       }
+       if (!HWS_NEEDS_PHYSICAL(engine->i915))
+               i915_vma_unpin(vma);
+
+       i915_gem_object_unpin_map(vma->obj);
+       __i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+                               struct i915_vma *vma)
+{
+       unsigned int flags;
+
+       flags = PIN_GLOBAL;
+       if (!HAS_LLC(engine->i915))
+               /*
+                * On g33, we cannot place HWS above 256MiB, so
+                * restrict its pinning to the low mappable arena.
+                * Though this restriction is not documented for
+                * gen4, gen5, or byt, they also behave similarly
+                * and hang if the HWS is placed at the top of the
+                * GTT. To generalise, it appears that all !llc
+                * platforms have issues with us placing the HWS
+                * above the mappable region (even though we never
+                * actually map it).
+                */
+               flags |= PIN_MAPPABLE;
+       else
+               flags |= PIN_HIGH;
 
-       i915_vma_unpin_and_release(&engine->status_page.vma,
-                                  I915_VMA_RELEASE_MAP);
+       return i915_vma_pin(vma, 0, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
-       unsigned int flags;
        void *vaddr;
        int ret;
 
+       /*
+        * Though the HWS register does support 36bit addresses, historically
+        * we have had hangs and corruption reported due to wild writes if
+        * the HWS is placed above 4G. We only allow objects to be allocated
+        * in GFP_DMA32 for i965, and no earlier physical address users had
+        * access to more than 4G.
+        */
        obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
        if (IS_ERR(obj)) {
                DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
                goto err;
        }
 
-       flags = PIN_GLOBAL;
-       if (!HAS_LLC(engine->i915))
-               /* On g33, we cannot place HWS above 256MiB, so
-                * restrict its pinning to the low mappable arena.
-                * Though this restriction is not documented for
-                * gen4, gen5, or byt, they also behave similarly
-                * and hang if the HWS is placed at the top of the
-                * GTT. To generalise, it appears that all !llc
-                * platforms have issues with us placing the HWS
-                * above the mappable region (even though we never
-                * actually map it).
-                */
-               flags |= PIN_MAPPABLE;
-       else
-               flags |= PIN_HIGH;
-       ret = i915_vma_pin(vma, 0, 0, flags);
-       if (ret)
-               goto err;
-
        vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
        if (IS_ERR(vaddr)) {
                ret = PTR_ERR(vaddr);
-               goto err_unpin;
+               goto err;
        }
 
+       engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
        engine->status_page.vma = vma;
-       engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-       engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+       if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+               ret = pin_ggtt_status_page(engine, vma);
+               if (ret)
+                       goto err_unpin;
+       }
+
        return 0;
 
 err_unpin:
-       i915_vma_unpin(vma);
+       i915_gem_object_unpin_map(obj);
 err:
        i915_gem_object_put(obj);
        return ret;
 }
 
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
-       struct page *page;
-
-       /*
-        * Though the HWS register does support 36bit addresses, historically
-        * we have had hangs and corruption reported due to wild writes if
-        * the HWS is placed above 4G.
-        */
-       page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
-       if (!page)
-               return -ENOMEM;
-
-       engine->status_page.page_addr = page_address(page);
-
-       return 0;
-}
-
 static void __intel_context_unpin(struct i915_gem_context *ctx,
                                  struct intel_engine_cs *engine)
 {
@@ -690,10 +693,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
        if (ret)
                goto err_unpin_preempt;
 
-       if (HWS_NEEDS_PHYSICAL(i915))
-               ret = init_phys_status_page(engine);
-       else
-               ret = init_status_page(engine);
+       ret = init_status_page(engine);
        if (ret)
                goto err_breadcrumbs;
 
@@ -1366,7 +1366,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
        }
 
        if (HAS_EXECLISTS(dev_priv)) {
-               const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+               const u32 *hws =
+                       &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
                unsigned int idx;
                u8 read, write;
 
@@ -1549,7 +1550,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
        spin_unlock_irqrestore(&b->rb_lock, flags);
 
        drm_printf(m, "HWSP:\n");
-       hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+       hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
        drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
 }
index 45e2db683fe5b9dabb1a7882b0e4558fc0ef14ff..4295ade0d6138781ca6abe4be5a85154d53ec73a 100644 (file)
  *
  */
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_PREEMPT_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
        return rb_entry(rb, struct i915_priolist, node);
index 185867106c1416cad1221a5e26bc85063a393ad3..2cf99c436658e7ee4dd2b384be873a015d1ebfb6 100644 (file)
@@ -172,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
                                     struct intel_engine_cs *engine,
                                     struct intel_ring *ring);
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_INDEX_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
        return rb_entry(rb, struct i915_priolist, node);
@@ -1699,7 +1705,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
                   _MASKED_BIT_DISABLE(STOP_RING));
 
        I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-                  engine->status_page.ggtt_offset);
+                  i915_ggtt_offset(engine->status_page.vma));
        POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 }
 
@@ -2244,10 +2250,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
        }
 
        execlists->csb_status =
-               &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+               &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
        execlists->csb_write =
-               &engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+               &engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
        reset_csb_pointers(execlists);
 
index a9efc8c712546c6e0c78720dbf9f13b6c3bd77d0..cb6d2aa2a829795cb4be83aeb4454080fc71018f 100644 (file)
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_INDEX_ADDR);
+}
+
 static unsigned int __intel_ring_space(unsigned int head,
                                       unsigned int tail,
                                       unsigned int size)
@@ -503,12 +509,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
        I915_WRITE(HWS_PGA, addr);
 }
 
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
 {
-       struct page *page = virt_to_page(engine->status_page.page_addr);
-       phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
+       struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
 
-       set_hws_pga(engine, phys);
+       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+       return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+       set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
        set_hwstam(engine, ~0u);
 }
 
@@ -575,7 +586,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 
 static void ring_setup_status_page(struct intel_engine_cs *engine)
 {
-       set_hwsp(engine, engine->status_page.ggtt_offset);
+       set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
        set_hwstam(engine, ~0u);
 
        flush_cs_tlb(engine);
index f2effd0015407b03af70932d0bb34d794af97e0f..32371ae67f247654988890361ab3e00edc68e6e0 100644 (file)
@@ -32,8 +32,7 @@ struct i915_sched_attr;
 
 struct intel_hw_status_page {
        struct i915_vma *vma;
-       u32 *page_addr;
-       u32 ggtt_offset;
+       u32 *addr;
 };
 
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -671,7 +670,7 @@ static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
        /* Ensure that the compiler doesn't optimize away the load. */
-       return READ_ONCE(engine->status_page.page_addr[reg]);
+       return READ_ONCE(engine->status_page.addr[reg]);
 }
 
 static inline void
@@ -684,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
         */
        if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
                mb();
-               clflush(&engine->status_page.page_addr[reg]);
-               engine->status_page.page_addr[reg] = value;
-               clflush(&engine->status_page.page_addr[reg]);
+               clflush(&engine->status_page.addr[reg]);
+               engine->status_page.addr[reg] = value;
+               clflush(&engine->status_page.addr[reg]);
                mb();
        } else {
-               WRITE_ONCE(engine->status_page.page_addr[reg], value);
+               WRITE_ONCE(engine->status_page.addr[reg], value);
        }
 }
 
@@ -877,16 +876,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
                               struct intel_instdone *instdone);
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-       return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
-       return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
index 905318b7ae1898b600e1a4db185065516c98fbd5..4e5b4dc6df0f6ef9c2a3dac9557ae41f64f80762 100644 (file)
@@ -200,7 +200,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
        engine->base.i915 = i915;
        snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
        engine->base.id = id;
-       engine->base.status_page.page_addr = (void *)(engine + 1);
+       engine->base.status_page.addr = (void *)(engine + 1);
 
        engine->base.context_pin = mock_context_pin;
        engine->base.request_alloc = mock_request_alloc;