From: Dave Chinner Date: Wed, 28 Aug 2013 00:18:09 +0000 (+1000) Subject: fs: convert fs shrinkers to new scan/count API X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=1ab6c4997e04a00c50c6d786c2f046adc0d1f5de;p=openwrt%2Fstaging%2Fblogic.git fs: convert fs shrinkers to new scan/count API Convert the filesystem shrinkers to use the new API, and standardise some of the behaviours of the shrinkers at the same time. For example, nr_to_scan means the number of objects to scan, not the number of objects to free. I refactored the CIFS idmap shrinker a little - it really needs to be broken up into a shrinker per tree and keep an item count with the tree root so that we don't need to walk the tree every time the shrinker needs to count the number of objects in the tree (i.e. all the time under memory pressure). [glommer@openvz.org: fixes for ext4, ubifs, nfs, cifs and glock. Fixes are needed mainly due to new code merged in the tree] [assorted fixes folded in] Signed-off-by: Dave Chinner Signed-off-by: Glauber Costa Acked-by: Mel Gorman Acked-by: Artem Bityutskiy Acked-by: Jan Kara Acked-by: Steven Whitehouse Cc: Adrian Hunter Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 2d1bdbe78c04..3981ff783950 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *ei; struct list_head *cur, *tmp; LIST_HEAD(skipped); - int ret, nr_shrunk = 0; + int nr_shrunk = 0; int retried = 0, skip_precached = 1, nr_skipped = 0; spin_lock(&sbi->s_es_lru_lock); retry: list_for_each_safe(cur, tmp, &sbi->s_es_lru) { + int shrunk; + /* * If we have already reclaimed all extents from extent * status tree, just stop the loop immediately. @@ -964,13 +966,13 @@ retry: continue; write_lock(&ei->i_es_lock); - ret = __es_try_to_reclaim_extents(ei, nr_to_scan); + shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); if (ei->i_es_lru_nr == 0) list_del_init(&ei->i_es_lru); write_unlock(&ei->i_es_lock); - nr_shrunk += ret; - nr_to_scan -= ret; + nr_shrunk += shrunk; + nr_to_scan -= shrunk; if (nr_to_scan == 0) break; } @@ -1007,7 +1009,20 @@ retry: return nr_shrunk; } -static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long ext4_es_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr; + struct ext4_sb_info *sbi; + + sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); + nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); + return nr; +} + +static unsigned long ext4_es_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct ext4_sb_info *sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); @@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); - return ret; + return nr_shrunk; } void ext4_es_register_shrinker(struct ext4_sb_info *sbi) @@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); sbi->s_es_last_sorted = 0; - sbi->s_es_shrinker.shrink = ext4_es_shrink; + sbi->s_es_shrinker.scan_objects = ext4_es_scan; + sbi->s_es_shrinker.count_objects = ext4_es_count; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&sbi->s_es_shrinker); } @@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, struct ext4_es_tree *tree = &ei->i_es_tree; struct rb_node *node; struct extent_status *es; - int nr_shrunk = 0; + unsigned long nr_shrunk = 0; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b782bb56085d..c2f41b4d00b9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1427,21 +1427,22 @@ __acquires(&lru_lock) * gfs2_dispose_glock_lru() above. */ -static void gfs2_scan_glock_lru(int nr) +static long gfs2_scan_glock_lru(int nr) { struct gfs2_glock *gl; LIST_HEAD(skipped); LIST_HEAD(dispose); + long freed = 0; spin_lock(&lru_lock); - while(nr && !list_empty(&lru_list)) { + while ((nr-- >= 0) && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); - nr--; + freed++; continue; } @@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr) if (!list_empty(&dispose)) gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); + + return freed; } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { - if (sc->nr_to_scan) { - if (!(sc->gfp_mask & __GFP_FS)) - return -1; - gfs2_scan_glock_lru(sc->nr_to_scan); - } + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + return gfs2_scan_glock_lru(sc->nr_to_scan); +} +static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ return vfs_pressure_ratio(atomic_read(&lru_count)); } static struct shrinker glock_shrinker = { - .shrink = gfs2_shrink_glock_memory, .seeks = DEFAULT_SEEKS, + .count_objects = gfs2_glock_shrink_count, + .scan_objects = gfs2_glock_shrink_scan, }; /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7b0f5043cf24..351586e24e30 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -32,7 +32,8 @@ struct workqueue_struct *gfs2_control_wq; static struct shrinker qd_shrinker = { - .shrink = gfs2_shrink_qd_memory, + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index d550a5d6a05f..db441359ee8c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) +unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; int nr_to_scan = sc->nr_to_scan; - - if (nr_to_scan == 0) - goto out; + long freed = 0; if (!(sc->gfp_mask & __GFP_FS)) - return -1; + return SHRINK_STOP; spin_lock(&qd_lru_lock); while (nr_to_scan && !list_empty(&qd_lru_list)) { @@ -110,10 +109,15 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&qd_lru_lock); nr_to_scan--; + freed++; } spin_unlock(&qd_lru_lock); + return freed; +} -out: +unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ return vfs_pressure_ratio(atomic_read(&qd_lru_count)); } diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 4f5e6e44ed83..0f64d9deb1b0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, - struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/mbcache.c b/fs/mbcache.c index 5eb04767cb29..e519e45bf673 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list); static LIST_HEAD(mb_cache_lru_list); static DEFINE_SPINLOCK(mb_cache_spinlock); -/* - * What the mbcache registers as to get shrunk dynamically. - */ - -static int mb_cache_shrink_fn(struct shrinker *shrink, - struct shrink_control *sc); - -static struct shrinker mb_cache_shrinker = { - .shrink = mb_cache_shrink_fn, - .seeks = DEFAULT_SEEKS, -}; - static inline int __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) { @@ -151,7 +139,7 @@ forget: /* - * mb_cache_shrink_fn() memory pressure callback + * mb_cache_shrink_scan() memory pressure callback * * This function is called by the kernel memory management when memory * gets low. @@ -159,17 +147,16 @@ forget: * @shrink: (ignored) * @sc: shrink_control passed from reclaim * - * Returns the number of objects which are present in the cache. + * Returns the number of objects freed. */ -static int -mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free_list); - struct mb_cache *cache; struct mb_cache_entry *entry, *tmp; - int count = 0; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + unsigned long freed = 0; mb_debug("trying to free %d entries", nr_to_scan); spin_lock(&mb_cache_spinlock); @@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) struct mb_cache_entry, e_lru_list); list_move_tail(&ce->e_lru_list, &free_list); __mb_cache_entry_unhash(ce); + freed++; + } + spin_unlock(&mb_cache_spinlock); + list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { + __mb_cache_entry_forget(entry, gfp_mask); } + return freed; +} + +static unsigned long +mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct mb_cache *cache; + unsigned long count = 0; + + spin_lock(&mb_cache_spinlock); list_for_each_entry(cache, &mb_cache_list, c_cache_list) { mb_debug("cache %s (%d)", cache->c_name, atomic_read(&cache->c_entry_count)); count += atomic_read(&cache->c_entry_count); } spin_unlock(&mb_cache_spinlock); - list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { - __mb_cache_entry_forget(entry, gfp_mask); - } + return vfs_pressure_ratio(count); } +static struct shrinker mb_cache_shrinker = { + .count_objects = mb_cache_shrink_count, + .scan_objects = mb_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; /* * mb_cache_create() create a new cache diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 813ef2571545..de434f309af0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc) +unsigned long +nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + long freed = 0; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + return SHRINK_STOP; spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { @@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink, struct nfs_access_entry, lru); list_move(&cache->lru, &head); rb_erase(&cache->rb_node, &nfsi->access_cache); + freed++; if (!list_empty(&nfsi->access_cache_entry_lru)) list_move_tail(&nfsi->access_cache_inode_lru, &nfs_access_lru_list); @@ -2046,6 +2048,12 @@ remove_lru_entry: } spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); + return freed; +} + +unsigned long +nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d388302c005f..38da8c2b81ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const char *ip_addr); /* dir.c */ -extern int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc); +extern unsigned long nfs_access_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); int nfs_create(struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct inode *, struct dentry *, umode_t); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5793f24613c8..a03b9c6f9489 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void) #endif static struct shrinker acl_shrinker = { - .shrink = nfs_access_cache_shrinker, + .count_objects = nfs_access_cache_count, + .scan_objects = nfs_access_cache_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..9186c7ce0b14 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -59,11 +59,14 @@ static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static void cache_cleaner_func(struct work_struct *unused); -static int nfsd_reply_cache_shrink(struct shrinker *shrink, - struct shrink_control *sc); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); static struct shrinker nfsd_reply_cache_shrinker = { - .shrink = nfsd_reply_cache_shrink, + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, .seeks = 1, }; @@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. */ -static void +static long prune_cache_entries(void) { struct svc_cacherep *rp, *tmp; + long freed = 0; list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { if (!nfsd_cache_entry_expired(rp) && num_drc_entries <= max_drc_entries) break; nfsd_reply_cache_free_locked(rp); + freed++; } /* @@ -254,6 +259,7 @@ prune_cache_entries(void) cancel_delayed_work(&cache_cleaner); else mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; } static void @@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused) spin_unlock(&cache_lock); } -static int -nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - unsigned int num; + unsigned long num; spin_lock(&cache_lock); - if (sc->nr_to_scan) - prune_cache_entries(); num = num_drc_entries; spin_unlock(&cache_lock); return num; } +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long freed; + + spin_lock(&cache_lock); + freed = prune_cache_entries(); + spin_unlock(&cache_lock); + return freed; +} /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes */ diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 13eee847605c..831d49a4111f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -687,44 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type) } EXPORT_SYMBOL(dquot_quota_sync); -/* Free unused dquots from cache */ -static void prune_dqcache(int count) +static unsigned long +dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct list_head *head; struct dquot *dquot; + unsigned long freed = 0; head = free_dquots.prev; - while (head != &free_dquots && count) { + while (head != &free_dquots && sc->nr_to_scan) { dquot = list_entry(head, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); - count--; + sc->nr_to_scan--; + freed++; head = free_dquots.prev; } + return freed; } -/* - * This is called from kswapd when we think we need some - * more memory - */ -static int shrink_dqcache_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - int nr = sc->nr_to_scan; - - if (nr) { - spin_lock(&dq_list_lock); - prune_dqcache(nr); - spin_unlock(&dq_list_lock); - } return vfs_pressure_ratio( percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } static struct shrinker dqcache_shrinker = { - .shrink = shrink_dqcache_memory, + .count_objects = dqcache_shrink_count, + .scan_objects = dqcache_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 9e1d05666fed..f35135e28e96 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,18 +277,25 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { - int nr = sc->nr_to_scan; - int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); - if (nr == 0) - /* - * Due to the way UBIFS updates the clean znode counter it may - * temporarily be negative. - */ - return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; +} + +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr = sc->nr_to_scan; + int contention = 0; + unsigned long freed; + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (!clean_zn_cnt) { /* @@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) if (!freed && contention) { dbg_tnc("freed nothing, but contention"); - return -1; + return SHRINK_STOP; } out: - dbg_tnc("%d znodes were freed, requested %d", freed, nr); + dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); return freed; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 879b9976c12b..3e4aa7281e04 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ static struct shrinker ubifs_shrinker_info = { - .shrink = ubifs_shrinker, + .scan_objects = ubifs_shrink_scan, + .count_objects = ubifs_shrink_count, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2babce4d70f..e8c8cfe1435c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info);