exec: pin stack limit during exec
authorKees Cook <keescook@chromium.org>
Tue, 10 Apr 2018 23:35:01 +0000 (16:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 11 Apr 2018 17:28:37 +0000 (10:28 -0700)
Since the stack rlimit is used in multiple places during exec and it can
be changed via other threads (via setrlimit()) or processes (via
prlimit()), the assumption that the value doesn't change cannot be made.
This leads to races with mm layout selection and argument size
calculations.  This changes the exec path to use the rlimit stored in
bprm instead of in current.  Before starting the thread, the bprm stack
rlimit is stored back to current.

Link: http://lkml.kernel.org/r/1518638796-20819-4-git-send-email-keescook@chromium.org
Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec")
Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Reported-by: Andy Lutomirski <luto@kernel.org>
Reported-by: Brad Spengler <spender@grsecurity.net>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Greg KH <greg@kroah.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/exec.c
include/linux/binfmts.h

index 422ad79a7a0376462f993702d75b1344e48fa255..183059c427b9c5552fc29d5eb01f90891930240f 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                 *    to work from.
                 */
                limit = _STK_LIM / 4 * 3;
-               limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+               limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
                if (size > limit)
                        goto fail;
        }
@@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm)
        if (!mm)
                goto err;
 
+       /* Save current stack limit for all calculations made during exec. */
+       task_lock(current->group_leader);
+       bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
+       task_unlock(current->group_leader);
+
        err = __bprm_mm_init(bprm);
        if (err)
                goto err;
@@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_STACK_GROWSUP
        /* Limit stack size */
-       stack_base = rlimit_max(RLIMIT_STACK);
+       stack_base = bprm->rlim_stack.rlim_max;
        if (stack_base > STACK_SIZE_MAX)
                stack_base = STACK_SIZE_MAX;
 
@@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
         * Align this down to a page boundary as expand_stack
         * will align it up.
         */
-       rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
+       rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
 #ifdef CONFIG_STACK_GROWSUP
        if (stack_size + stack_expand > rlim_stack)
                stack_base = vma->vm_start + rlim_stack;
@@ -1323,8 +1328,6 @@ EXPORT_SYMBOL(would_dump);
 
 void setup_new_exec(struct linux_binprm * bprm)
 {
-       struct rlimit rlim_stack;
-
        /*
         * Once here, prepare_binrpm() will not be called any more, so
         * the final state of setuid/setgid/fscaps can be merged into the
@@ -1343,15 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm)
                 * RLIMIT_STACK, but after the point of no return to avoid
                 * needing to clean up the change on failure.
                 */
-               if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
-                       current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+               if (bprm->rlim_stack.rlim_cur > _STK_LIM)
+                       bprm->rlim_stack.rlim_cur = _STK_LIM;
        }
 
-       task_lock(current->group_leader);
-       rlim_stack = current->signal->rlim[RLIMIT_STACK];
-       task_unlock(current->group_leader);
-
-       arch_pick_mmap_layout(current->mm, &rlim_stack);
+       arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
 
        current->sas_ss_sp = current->sas_ss_size = 0;
 
@@ -1387,6 +1386,10 @@ EXPORT_SYMBOL(setup_new_exec);
 /* Runs immediately before start_thread() takes over. */
 void finalize_exec(struct linux_binprm *bprm)
 {
+       /* Store any stack rlimit changes before starting thread. */
+       task_lock(current->group_leader);
+       current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
+       task_unlock(current->group_leader);
 }
 EXPORT_SYMBOL(finalize_exec);
 
index 40e52afbb2b0cddf114ae977c6f6a187816f94f9..4955e0863b83d456ba467f291d335b01bc1aa2a6 100644 (file)
@@ -61,6 +61,8 @@ struct linux_binprm {
        unsigned interp_flags;
        unsigned interp_data;
        unsigned long loader, exec;
+
+       struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
 } __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0