The i915 driver uses shmemfs to allocate backing storage for gem
objects. These shmemfs pages can be pinned (increased ref count) by
shmem_read_mapping_page_gfp(). When a lot of pages are pinned, vmscan
wastes a lot of time scanning these pinned pages. In some extreme case,
all pages in the inactive anon lru are pinned, and only the inactive
anon lru is scanned due to inactive_ratio, the system cannot swap and
invokes the oom-killer. Mark these pinned pages as unevictable to speed
up vmscan.
Export pagevec API check_move_unevictable_pages().
This patch was inspired by Chris Wilson's change [1].
[1]: https://patchwork.kernel.org/patch/
9768741/
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Kuo-Hsin Yang <vovoy@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.com> # mm part
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106132324.17390-1-chris@chris-wilson.co.uk
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Query the address space, and return true if it is completely
unevictable.
-These are currently used in two places in the kernel:
+These are currently used in three places in the kernel:
(1) By ramfs to mark the address spaces of its inodes when they are created,
and this mark remains for the life of the inode.
swapped out; the application must touch the pages manually if it wants to
ensure they're in memory.
+ (3) By the i915 driver to mark pinned address space until it's unpinned. The
+ amount of unevictable memory marked by i915 driver is roughly the bounded
+ object size in debugfs/dri/0/i915_gem_objects.
+
Detecting Unevictable Pages
---------------------------
invalidate_mapping_pages(mapping, 0, (loff_t)-1);
}
+/*
+ * Move pages to appropriate lru and release the pagevec, decrementing the
+ * ref count of those pages.
+ */
+static void check_release_pagevec(struct pagevec *pvec)
+{
+ check_move_unevictable_pages(pvec);
+ __pagevec_release(pvec);
+ cond_resched();
+}
+
static void
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
struct sgt_iter sgt_iter;
+ struct pagevec pvec;
struct page *page;
__i915_gem_object_release_shmem(obj, pages, true);
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_save_bit_17_swizzle(obj, pages);
+ mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
+
+ pagevec_init(&pvec);
for_each_sgt_page(page, sgt_iter, pages) {
if (obj->mm.dirty)
set_page_dirty(page);
if (obj->mm.madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
- put_page(page);
- cond_resched();
+ if (!pagevec_add(&pvec, page))
+ check_release_pagevec(&pvec);
}
+ if (pagevec_count(&pvec))
+ check_release_pagevec(&pvec);
obj->mm.dirty = false;
sg_free_table(pages);
unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment = i915_sg_segment_size();
unsigned int sg_page_sizes;
+ struct pagevec pvec;
gfp_t noreclaim;
int ret;
* Fail silently without starting the shrinker
*/
mapping = obj->base.filp->f_mapping;
+ mapping_set_unevictable(mapping);
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
err_sg:
sg_mark_end(sg);
err_pages:
- for_each_sgt_page(page, sgt_iter, st)
- put_page(page);
+ mapping_clear_unevictable(mapping);
+ pagevec_init(&pvec);
+ for_each_sgt_page(page, sgt_iter, st) {
+ if (!pagevec_add(&pvec, page))
+ check_release_pagevec(&pvec);
+ }
+ if (pagevec_count(&pvec))
+ check_release_pagevec(&pvec);
sg_free_table(st);
kfree(st);
struct bio;
+struct pagevec;
+
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0
#endif
extern int page_evictable(struct page *page);
-extern void check_move_unevictable_pages(struct page **, int nr_pages);
+extern void check_move_unevictable_pages(struct pagevec *pvec);
extern int kswapd_run(int nid);
extern void kswapd_stop(int nid);
break;
index = indices[pvec.nr - 1] + 1;
pagevec_remove_exceptionals(&pvec);
- check_move_unevictable_pages(pvec.pages, pvec.nr);
+ check_move_unevictable_pages(&pvec);
pagevec_release(&pvec);
cond_resched();
}
#include <linux/delayacct.h>
#include <linux/sysctl.h>
#include <linux/oom.h>
+#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
#include <linux/dax.h>
return ret;
}
-#ifdef CONFIG_SHMEM
/**
- * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
- * @pages: array of pages to check
- * @nr_pages: number of pages to check
+ * check_move_unevictable_pages - check pages for evictability and move to
+ * appropriate zone lru list
+ * @pvec: pagevec with lru pages to check
*
- * Checks pages for evictability and moves them to the appropriate lru list.
- *
- * This function is only used for SysV IPC SHM_UNLOCK.
+ * Checks pages for evictability, if an evictable page is in the unevictable
+ * lru list, moves it to the appropriate evictable lru list. This function
+ * should be only used for lru pages.
*/
-void check_move_unevictable_pages(struct page **pages, int nr_pages)
+void check_move_unevictable_pages(struct pagevec *pvec)
{
struct lruvec *lruvec;
struct pglist_data *pgdat = NULL;
int pgrescued = 0;
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pages[i];
+ for (i = 0; i < pvec->nr; i++) {
+ struct page *page = pvec->pages[i];
struct pglist_data *pagepgdat = page_pgdat(page);
pgscanned++;
spin_unlock_irq(&pgdat->lru_lock);
}
}
-#endif /* CONFIG_SHMEM */
+EXPORT_SYMBOL_GPL(check_move_unevictable_pages);