mm: make mm->pinned_vm an atomic64 counter
authorDavidlohr Bueso <dave@stgolabs.net>
Wed, 6 Feb 2019 17:59:15 +0000 (09:59 -0800)
committerJason Gunthorpe <jgg@mellanox.com>
Thu, 7 Feb 2019 19:54:02 +0000 (12:54 -0700)
Taking a sleeping lock to _only_ increment a variable is quite the
overkill, and pretty much all users do this. Furthermore, some drivers
(ie: infiniband and scif) that need pinned semantics can go to quite
some trouble to actually delay via workqueue (un)accounting for pinned
pages when not possible to acquire it.

By making the counter atomic we no longer need to hold the mmap_sem and
can simply some code around it for pinned_vm users. The counter is 64-bit
such that we need not worry about overflows such as rdma user input
controlled from userspace.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/core/umem.c
drivers/infiniband/hw/hfi1/user_pages.c
drivers/infiniband/hw/qib/qib_user_pages.c
drivers/infiniband/hw/usnic/usnic_uiom.c
drivers/misc/mic/scif/scif_rma.c
fs/proc/task_mmu.c
include/linux/mm_types.h
kernel/events/core.c
kernel/fork.c
mm/debug.c

index 1efe0a74e06b8e055a7ce1c5e3f0e68ac0e421b4..678abe1afcbac14f4ab547a2fe355764eccc4fa9 100644 (file)
@@ -166,13 +166,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
        down_write(&mm->mmap_sem);
-       if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
-           (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
+       new_pinned = atomic64_read(&mm->pinned_vm) + npages;
+       if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
                up_write(&mm->mmap_sem);
                ret = -ENOMEM;
                goto out;
        }
-       mm->pinned_vm = new_pinned;
+       atomic64_set(&mm->pinned_vm, new_pinned);
        up_write(&mm->mmap_sem);
 
        cur_base = addr & PAGE_MASK;
@@ -234,7 +234,7 @@ umem_release:
        __ib_umem_release(context->device, umem, 0);
 vma:
        down_write(&mm->mmap_sem);
-       mm->pinned_vm -= ib_umem_num_pages(umem);
+       atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
        up_write(&mm->mmap_sem);
 out:
        if (vma_list)
@@ -263,7 +263,7 @@ static void ib_umem_release_defer(struct work_struct *work)
        struct ib_umem *umem = container_of(work, struct ib_umem, work);
 
        down_write(&umem->owning_mm->mmap_sem);
-       umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+       atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
        up_write(&umem->owning_mm->mmap_sem);
 
        __ib_umem_release_tail(umem);
@@ -302,7 +302,7 @@ void ib_umem_release(struct ib_umem *umem)
        } else {
                down_write(&umem->owning_mm->mmap_sem);
        }
-       umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+       atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
        up_write(&umem->owning_mm->mmap_sem);
 
        __ib_umem_release_tail(umem);
index e341e6dcc3885c5a528d8ec43f157eee1a2481f4..40a6e434190fac931045c54c00b3dcf9a2662515 100644 (file)
@@ -92,7 +92,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
        size = DIV_ROUND_UP(size, PAGE_SIZE);
 
        down_read(&mm->mmap_sem);
-       pinned = mm->pinned_vm;
+       pinned = atomic64_read(&mm->pinned_vm);
        up_read(&mm->mmap_sem);
 
        /* First, check the absolute limit against all pinned pages. */
@@ -112,7 +112,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
                return ret;
 
        down_write(&mm->mmap_sem);
-       mm->pinned_vm += ret;
+       atomic64_add(ret, &mm->pinned_vm);
        up_write(&mm->mmap_sem);
 
        return ret;
@@ -131,7 +131,7 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 
        if (mm) { /* during close after signal, mm can be NULL */
                down_write(&mm->mmap_sem);
-               mm->pinned_vm -= npages;
+               atomic64_sub(npages, &mm->pinned_vm);
                up_write(&mm->mmap_sem);
        }
 }
index 075f09fb7ce3adcaf85c852e499e9ded8090fec4..c6c81022d3136a9f6cf9b78f1b817becc1dc9c09 100644 (file)
@@ -75,7 +75,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
                        goto bail_release;
        }
 
-       current->mm->pinned_vm += num_pages;
+       atomic64_add(num_pages, &current->mm->pinned_vm);
 
        ret = 0;
        goto bail;
@@ -156,7 +156,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
        __qib_release_user_pages(p, num_pages, 1);
 
        if (current->mm) {
-               current->mm->pinned_vm -= num_pages;
+               atomic64_sub(num_pages, &current->mm->pinned_vm);
                up_write(&current->mm->mmap_sem);
        }
 }
index ce01a59fccc4ff9ee1e8232a64d7d49bcf91197b..854436a2b4371db61944d0a8848f221a62e49c18 100644 (file)
@@ -129,7 +129,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
        uiomr->owning_mm = mm = current->mm;
        down_write(&mm->mmap_sem);
 
-       locked = npages + current->mm->pinned_vm;
+       locked = npages + atomic64_read(&current->mm->pinned_vm);
        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
        if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -187,7 +187,7 @@ out:
        if (ret < 0)
                usnic_uiom_put_pages(chunk_list, 0);
        else {
-               mm->pinned_vm = locked;
+               atomic64_set(&mm->pinned_vm, locked);
                mmgrab(uiomr->owning_mm);
        }
 
@@ -441,7 +441,7 @@ static void usnic_uiom_release_defer(struct work_struct *work)
                container_of(work, struct usnic_uiom_reg, work);
 
        down_write(&uiomr->owning_mm->mmap_sem);
-       uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+       atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
        up_write(&uiomr->owning_mm->mmap_sem);
 
        __usnic_uiom_release_tail(uiomr);
@@ -469,7 +469,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
        } else {
                down_write(&uiomr->owning_mm->mmap_sem);
        }
-       uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+       atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
        up_write(&uiomr->owning_mm->mmap_sem);
 
        __usnic_uiom_release_tail(uiomr);
index 749321eb91ae1d50aa6853078f67501c0e2077ad..2448368f181ed28601bb7606673c46bbff59d428 100644 (file)
@@ -285,7 +285,7 @@ __scif_dec_pinned_vm_lock(struct mm_struct *mm,
        } else {
                down_write(&mm->mmap_sem);
        }
-       mm->pinned_vm -= nr_pages;
+       atomic64_sub(nr_pages, &mm->pinned_vm);
        up_write(&mm->mmap_sem);
        return 0;
 }
@@ -299,7 +299,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
                return 0;
 
        locked = nr_pages;
-       locked += mm->pinned_vm;
+       locked += atomic64_read(&mm->pinned_vm);
        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
        if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
                dev_err(scif_info.mdev.this_device,
@@ -307,7 +307,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
                        locked, lock_limit);
                return -ENOMEM;
        }
-       mm->pinned_vm = locked;
+       atomic64_set(&mm->pinned_vm, locked);
        return 0;
 }
 
index f0ec9edab2f31858c5928b5b425d21192ec60466..d2902962244dfbc444db6872a855575c991289ab 100644 (file)
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
        SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
        SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
        SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
-       SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+       SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
        SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
        SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
        SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
index 2c471a2c43fa7137f1780b976615a6cbef5f6b6e..acea2ea2d6c469ef7a23850e30bf06b1a26ad2db 100644 (file)
@@ -405,7 +405,7 @@ struct mm_struct {
 
                unsigned long total_vm;    /* Total pages mapped */
                unsigned long locked_vm;   /* Pages that have PG_mlocked set */
-               unsigned long pinned_vm;   /* Refcount permanently increased */
+               atomic64_t    pinned_vm;   /* Refcount permanently increased */
                unsigned long data_vm;     /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
                unsigned long exec_vm;     /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
                unsigned long stack_vm;    /* VM_STACK */
index e5ede6918050eeddb27e1aa16361bbe6d6813b53..29e9f2473656e66185e2417d349c556c95d2f00e 100644 (file)
@@ -5459,7 +5459,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
                /* now it's safe to free the pages */
                atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
-               vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+               atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
 
                /* this has to be the last one */
                rb_free_aux(rb);
@@ -5532,7 +5532,7 @@ again:
         */
 
        atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
-       vma->vm_mm->pinned_vm -= mmap_locked;
+       atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
        free_uid(mmap_user);
 
 out_put:
@@ -5680,7 +5680,7 @@ accounting:
 
        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
-       locked = vma->vm_mm->pinned_vm + extra;
+       locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
 
        if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
                !capable(CAP_IPC_LOCK)) {
@@ -5721,7 +5721,7 @@ accounting:
 unlock:
        if (!ret) {
                atomic_long_add(user_extra, &user->locked_vm);
-               vma->vm_mm->pinned_vm += extra;
+               atomic64_add(extra, &vma->vm_mm->pinned_vm);
 
                atomic_inc(&event->mmap_count);
        } else if (rb) {
index b69248e6f0e024c0407df16dfdc8a4919b590c78..85e08c379a9edaee19b7ceefb5b6aa1527a6e377 100644 (file)
@@ -981,7 +981,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        mm_pgtables_bytes_init(mm);
        mm->map_count = 0;
        mm->locked_vm = 0;
-       mm->pinned_vm = 0;
+       atomic64_set(&mm->pinned_vm, 0);
        memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
        spin_lock_init(&mm->page_table_lock);
        spin_lock_init(&mm->arg_lock);
index 0abb987dad9b3d697f252469d2111dc61f530913..7d13941a72f90c0e5a04c37d26f3ddc349507bb8 100644 (file)
@@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
                "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
                "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
                "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
-               "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
+               "pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
                "start_code %lx end_code %lx start_data %lx end_data %lx\n"
                "start_brk %lx brk %lx start_stack %lx\n"
                "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
@@ -166,7 +166,8 @@ void dump_mm(const struct mm_struct *mm)
                mm_pgtables_bytes(mm),
                mm->map_count,
                mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
-               mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
+               atomic64_read(&mm->pinned_vm),
+               mm->data_vm, mm->exec_vm, mm->stack_vm,
                mm->start_code, mm->end_code, mm->start_data, mm->end_data,
                mm->start_brk, mm->brk, mm->start_stack,
                mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,