From 63176d7e1ed8817b9219e24caa6182dc2acb77e8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 11 Jan 2015 18:25:23 +0000
Subject: [PATCH] musl: merge changes from git, includes further getopt fixes

Signed-off-by: Felix Fietkau <nbd@openwrt.org>

SVN-Revision: 43939
---
 .../musl/patches/000-git-2015-01-09.patch     | 3440 +++++++++++++++++
 .../patches/001-revert-getopt-change.patch    |  113 +
 .../110-getopt_optional_argument_fix.patch    |   20 -
 .../120-getopt_non-option-arguments_fix.patch |   43 -
 4 files changed, 3553 insertions(+), 63 deletions(-)
 create mode 100644 toolchain/musl/patches/000-git-2015-01-09.patch
 create mode 100644 toolchain/musl/patches/001-revert-getopt-change.patch
 delete mode 100644 toolchain/musl/patches/110-getopt_optional_argument_fix.patch
 delete mode 100644 toolchain/musl/patches/120-getopt_non-option-arguments_fix.patch

diff --git a/toolchain/musl/patches/000-git-2015-01-09.patch b/toolchain/musl/patches/000-git-2015-01-09.patch
new file mode 100644
index 0000000000..c710fe0ef2
--- /dev/null
+++ b/toolchain/musl/patches/000-git-2015-01-09.patch
@@ -0,0 +1,3440 @@
+--- a/arch/arm/atomic.h
++++ b/arch/arm/atomic.h
+@@ -22,37 +22,150 @@ static inline int a_ctz_64(uint64_t x)
+ 	return a_ctz_l(y);
+ }
+ 
+-#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
+- || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
+-
+ #if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
+-#define MEM_BARRIER "dmb ish"
+-#else
+-#define MEM_BARRIER "mcr p15,0,r0,c7,c10,5"
+-#endif
+ 
+-static inline int __k_cas(int t, int s, volatile int *p)
++static inline void a_barrier()
+ {
+-	int ret;
+-	__asm__(
+-		"	" MEM_BARRIER "\n"
++	__asm__ __volatile__("dmb ish");
++}
++
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	int old;
++	__asm__ __volatile__(
++		"	dmb ish\n"
+ 		"1:	ldrex %0,%3\n"
+-		"	subs %0,%0,%1\n"
+-#ifdef __thumb__
+-		"	itt eq\n"
+-#endif
+-		"	strexeq %0,%2,%3\n"
+-		"	teqeq %0,#1\n"
+-		"	beq 1b\n"
+-		"	" MEM_BARRIER "\n"
+-		: "=&r"(ret)
++		"	cmp %0,%1\n"
++		"	bne 1f\n"
++		"	strex %0,%2,%3\n"
++		"	cmp %0, #0\n"
++		"	bne 1b\n"
++		"	mov %0, %1\n"
++		"1:	dmb ish\n"
++		: "=&r"(old)
+ 		: "r"(t), "r"(s), "Q"(*p)
+ 		: "memory", "cc" );
+-	return ret;
++	return old;
++}
++
++static inline int a_swap(volatile int *x, int v)
++{
++	int old, tmp;
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"1:	ldrex %0,%3\n"
++		"	strex %1,%2,%3\n"
++		"	cmp %1, #0\n"
++		"	bne 1b\n"
++		"	dmb ish\n"
++		: "=&r"(old), "=&r"(tmp)
++		: "r"(v), "Q"(*x)
++		: "memory", "cc" );
++	return old;
++}
++
++static inline int a_fetch_add(volatile int *x, int v)
++{
++	int old, tmp;
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"1:	ldrex %0,%3\n"
++		"	add %0,%0,%2\n"
++		"	strex %1,%0,%3\n"
++		"	cmp %1, #0\n"
++		"	bne 1b\n"
++		"	dmb ish\n"
++		: "=&r"(old), "=&r"(tmp)
++		: "r"(v), "Q"(*x)
++		: "memory", "cc" );
++	return old-v;
++}
++
++static inline void a_inc(volatile int *x)
++{
++	int tmp, tmp2;
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"1:	ldrex %0,%2\n"
++		"	add %0,%0,#1\n"
++		"	strex %1,%0,%2\n"
++		"	cmp %1, #0\n"
++		"	bne 1b\n"
++		"	dmb ish\n"
++		: "=&r"(tmp), "=&r"(tmp2)
++		: "Q"(*x)
++		: "memory", "cc" );
++}
++
++static inline void a_dec(volatile int *x)
++{
++	int tmp, tmp2;
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"1:	ldrex %0,%2\n"
++		"	sub %0,%0,#1\n"
++		"	strex %1,%0,%2\n"
++		"	cmp %1, #0\n"
++		"	bne 1b\n"
++		"	dmb ish\n"
++		: "=&r"(tmp), "=&r"(tmp2)
++		: "Q"(*x)
++		: "memory", "cc" );
++}
++
++static inline void a_and(volatile int *x, int v)
++{
++	int tmp, tmp2;
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"1:	ldrex %0,%3\n"
++		"	and %0,%0,%2\n"
++		"	strex %1,%0,%3\n"
++		"	cmp %1, #0\n"
++		"	bne 1b\n"
++		"	dmb ish\n"
++		: "=&r"(tmp), "=&r"(tmp2)
++		: "r"(v), "Q"(*x)
++		: "memory", "cc" );
++}
++
++static inline void a_or(volatile int *x, int v)
++{
++	int tmp, tmp2;
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"1:	ldrex %0,%3\n"
++		"	orr %0,%0,%2\n"
++		"	strex %1,%0,%3\n"
++		"	cmp %1, #0\n"
++		"	bne 1b\n"
++		"	dmb ish\n"
++		: "=&r"(tmp), "=&r"(tmp2)
++		: "r"(v), "Q"(*x)
++		: "memory", "cc" );
++}
++
++static inline void a_store(volatile int *p, int x)
++{
++	__asm__ __volatile__(
++		"	dmb ish\n"
++		"	str %1,%0\n"
++		"	dmb ish\n"
++		: "=m"(*p)
++		: "r"(x)
++		: "memory", "cc" );
+ }
++
+ #else
+-#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0)
+-#endif
++
++int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden")));
++#define __k_cas __a_cas
++
++static inline void a_barrier()
++{
++	__asm__ __volatile__("bl __a_barrier"
++		: : : "memory", "cc", "ip", "lr" );
++}
+ 
+ static inline int a_cas(volatile int *p, int t, int s)
+ {
+@@ -65,11 +178,6 @@ static inline int a_cas(volatile int *p,
+ 	}
+ }
+ 
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+ static inline int a_swap(volatile int *x, int v)
+ {
+ 	int old;
+@@ -98,19 +206,9 @@ static inline void a_dec(volatile int *x
+ 
+ static inline void a_store(volatile int *p, int x)
+ {
+-	while (__k_cas(*p, x, p));
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_barrier()
+-{
+-	__k_cas(0, 0, &(int){0});
+-}
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
++	a_barrier();
++	*p = x;
++	a_barrier();
+ }
+ 
+ static inline void a_and(volatile int *p, int v)
+@@ -127,6 +225,20 @@ static inline void a_or(volatile int *p,
+ 	while (__k_cas(old, old|v, p));
+ }
+ 
++#endif
++
++static inline void *a_cas_p(volatile void *p, void *t, void *s)
++{
++	return (void *)a_cas(p, (int)t, (int)s);
++}
++
++#define a_spin a_barrier
++
++static inline void a_crash()
++{
++	*(volatile char *)0=0;
++}
++
+ static inline void a_or_l(volatile void *p, long v)
+ {
+ 	a_or(p, v);
+--- a/arch/arm/bits/alltypes.h.in
++++ b/arch/arm/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF unsigned wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ TYPEDEF float float_t;
+ TYPEDEF double double_t;
+--- a/arch/arm/bits/syscall.h
++++ b/arch/arm/bits/syscall.h
+@@ -339,7 +339,13 @@
+ #define __NR_seccomp	383
+ #define __NR_getrandom	384
+ #define __NR_memfd_create	385
++#define __NR_bpf	386
+ 
++#define __ARM_NR_breakpoint	0x0f0001
++#define __ARM_NR_cacheflush	0x0f0002
++#define __ARM_NR_usr26		0x0f0003
++#define __ARM_NR_usr32		0x0f0004
++#define __ARM_NR_set_tls	0x0f0005
+ 
+ /* Repeated with SYS_ prefix */
+ 
+@@ -684,3 +690,4 @@
+ #define SYS_seccomp	383
+ #define SYS_getrandom	384
+ #define SYS_memfd_create	385
++#define SYS_bpf	386
+--- a/arch/arm/pthread_arch.h
++++ b/arch/arm/pthread_arch.h
+@@ -10,9 +10,17 @@ static inline __attribute__((const)) pth
+ 
+ #else
+ 
+-typedef char *(*__ptr_func_t)(void) __attribute__((const));
+-#define __pthread_self() \
+-	((pthread_t)(((__ptr_func_t)0xffff0fe0)()+8-sizeof(struct pthread)))
++static inline __attribute__((const)) pthread_t __pthread_self()
++{
++#ifdef __clang__
++	char *p;
++	__asm__( "bl __a_gettp\n\tmov %0,r0" : "=r"(p) : : "cc", "r0", "lr" );
++#else
++	register char *p __asm__("r0");
++	__asm__( "bl __a_gettp" : "=r"(p) : : "cc", "lr" );
++#endif
++	return (void *)(p+8-sizeof(struct pthread));
++}
+ 
+ #endif
+ 
+--- /dev/null
++++ b/arch/arm/src/__set_thread_area.c
+@@ -0,0 +1,49 @@
++#include <stdint.h>
++#include <elf.h>
++#include "pthread_impl.h"
++#include "libc.h"
++
++#define HWCAP_TLS (1 << 15)
++
++extern const unsigned char __attribute__((__visibility__("hidden")))
++	__a_barrier_dummy[], __a_barrier_oldkuser[],
++	__a_barrier_v6[], __a_barrier_v7[],
++	__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
++	__a_gettp_dummy[];
++
++#define __a_barrier_kuser 0xffff0fa0
++#define __a_cas_kuser 0xffff0fc0
++#define __a_gettp_kuser 0xffff0fe0
++
++extern uintptr_t __attribute__((__visibility__("hidden")))
++	__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
++
++#define SET(op,ver) (__a_##op##_ptr = \
++	(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
++
++int __set_thread_area(void *p)
++{
++#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
++	if (__hwcap & HWCAP_TLS) {
++		size_t *aux;
++		SET(cas, v7);
++		SET(barrier, v7);
++		for (aux=libc.auxv; *aux; aux+=2) {
++			if (*aux != AT_PLATFORM) continue;
++			const char *s = (void *)aux[1];
++			if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
++			SET(cas, v6);
++			SET(barrier, v6);
++			break;
++		}
++	} else {
++		int ver = *(int *)0xffff0ffc;
++		SET(gettp, kuser);
++		SET(cas, kuser);
++		SET(barrier, kuser);
++		if (ver < 2) a_crash();
++		if (ver < 3) SET(barrier, oldkuser);
++	}
++#endif
++	return __syscall(0xf0005, p);
++}
+--- /dev/null
++++ b/arch/arm/src/arm/atomics.s
+@@ -0,0 +1,116 @@
++.text
++
++.global __a_barrier
++.hidden __a_barrier
++.type __a_barrier,%function
++__a_barrier:
++	ldr ip,1f
++	ldr ip,[pc,ip]
++	add pc,pc,ip
++1:	.word __a_barrier_ptr-1b
++.global __a_barrier_dummy
++.hidden __a_barrier_dummy
++__a_barrier_dummy:
++	tst lr,#1
++	moveq pc,lr
++	bx lr
++.global __a_barrier_oldkuser
++.hidden __a_barrier_oldkuser
++__a_barrier_oldkuser:
++	push {r0,r1,r2,r3,ip,lr}
++	mov r1,r0
++	mov r2,sp
++	ldr ip,=0xffff0fc0
++	mov lr,pc
++	mov pc,ip
++	pop {r0,r1,r2,r3,ip,lr}
++	tst lr,#1
++	moveq pc,lr
++	bx lr
++.global __a_barrier_v6
++.hidden __a_barrier_v6
++__a_barrier_v6:
++	mcr p15,0,r0,c7,c10,5
++	bx lr
++.global __a_barrier_v7
++.hidden __a_barrier_v7
++__a_barrier_v7:
++	.word 0xf57ff05b        /* dmb ish */
++	bx lr
++
++.global __a_cas
++.hidden __a_cas
++.type __a_cas,%function
++__a_cas:
++	ldr ip,1f
++	ldr ip,[pc,ip]
++	add pc,pc,ip
++1:	.word __a_cas_ptr-1b
++.global __a_cas_dummy
++.hidden __a_cas_dummy
++__a_cas_dummy:
++	mov r3,r0
++	ldr r0,[r2]
++	subs r0,r3,r0
++	streq r1,[r2]
++	tst lr,#1
++	moveq pc,lr
++	bx lr
++.global __a_cas_v6
++.hidden __a_cas_v6
++__a_cas_v6:
++	mov r3,r0
++	mcr p15,0,r0,c7,c10,5
++1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
++	subs r0,r3,r0
++	.word 0x01820f91        /* strexeq r0,r1,[r2] */
++	teqeq r0,#1
++	beq 1b
++	mcr p15,0,r0,c7,c10,5
++	bx lr
++.global __a_cas_v7
++.hidden __a_cas_v7
++__a_cas_v7:
++	mov r3,r0
++	.word 0xf57ff05b        /* dmb ish */
++1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
++	subs r0,r3,r0
++	.word 0x01820f91        /* strexeq r0,r1,[r2] */
++	teqeq r0,#1
++	beq 1b
++	.word 0xf57ff05b        /* dmb ish */
++	bx lr
++
++.global __aeabi_read_tp
++.type __aeabi_read_tp,%function
++__aeabi_read_tp:
++
++.global __a_gettp
++.hidden __a_gettp
++.type __a_gettp,%function
++__a_gettp:
++	ldr r0,1f
++	ldr r0,[pc,r0]
++	add pc,pc,r0
++1:	.word __a_gettp_ptr-1b
++.global __a_gettp_dummy
++.hidden __a_gettp_dummy
++__a_gettp_dummy:
++	mrc p15,0,r0,c13,c0,3
++	bx lr
++
++.data
++.global __a_barrier_ptr
++.hidden __a_barrier_ptr
++__a_barrier_ptr:
++	.word 0
++
++.global __a_cas_ptr
++.hidden __a_cas_ptr
++__a_cas_ptr:
++	.word 0
++
++.global __a_gettp_ptr
++.hidden __a_gettp_ptr
++__a_gettp_ptr:
++	.word 0
+--- a/arch/arm/syscall_arch.h
++++ b/arch/arm/syscall_arch.h
+@@ -5,8 +5,6 @@
+ 
+ long (__syscall)(long, ...);
+ 
+-#ifndef __clang__
+-
+ #define __asm_syscall(...) do { \
+ 	__asm__ __volatile__ ( "svc 0" \
+ 	: "=r"(r0) : __VA_ARGS__ : "memory"); \
+@@ -54,41 +52,25 @@ static inline long __syscall4(long n, lo
+ 	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3));
+ }
+ 
+-#else
+-
+-static inline long __syscall0(long n)
+-{
+-	return (__syscall)(n);
+-}
+-
+-static inline long __syscall1(long n, long a)
+-{
+-	return (__syscall)(n, a);
+-}
+-
+-static inline long __syscall2(long n, long a, long b)
+-{
+-	return (__syscall)(n, a, b);
+-}
+-
+-static inline long __syscall3(long n, long a, long b, long c)
+-{
+-	return (__syscall)(n, a, b, c);
+-}
+-
+-static inline long __syscall4(long n, long a, long b, long c, long d)
+-{
+-	return (__syscall)(n, a, b, c, d);
+-}
+-
+-#endif
+-
+ static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+ {
+-	return (__syscall)(n, a, b, c, d, e);
++	register long r7 __asm__("r7") = n;
++	register long r0 __asm__("r0") = a;
++	register long r1 __asm__("r1") = b;
++	register long r2 __asm__("r2") = c;
++	register long r3 __asm__("r3") = d;
++	register long r4 __asm__("r4") = e;
++	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4));
+ }
+ 
+ static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+ {
+-	return (__syscall)(n, a, b, c, d, e, f);
++	register long r7 __asm__("r7") = n;
++	register long r0 __asm__("r0") = a;
++	register long r1 __asm__("r1") = b;
++	register long r2 __asm__("r2") = c;
++	register long r3 __asm__("r3") = d;
++	register long r4 __asm__("r4") = e;
++	register long r5 __asm__("r5") = f;
++	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+ }
+--- a/arch/i386/bits/alltypes.h.in
++++ b/arch/i386/bits/alltypes.h.in
+@@ -17,7 +17,6 @@ TYPEDEF __WCHAR_TYPE__ wchar_t;
+ TYPEDEF long wchar_t;
+ #endif
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 0
+ TYPEDEF float float_t;
+--- a/arch/i386/bits/syscall.h
++++ b/arch/i386/bits/syscall.h
+@@ -355,6 +355,7 @@
+ #define __NR_seccomp		354
+ #define __NR_getrandom		355
+ #define __NR_memfd_create	356
++#define __NR_bpf		357
+ 
+ 
+ /* Repeated with SYS_ prefix */
+@@ -716,3 +717,4 @@
+ #define SYS_seccomp		354
+ #define SYS_getrandom		355
+ #define SYS_memfd_create	356
++#define SYS_bpf			357
+--- a/arch/microblaze/bits/alltypes.h.in
++++ b/arch/microblaze/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF int wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ TYPEDEF float float_t;
+ TYPEDEF double double_t;
+--- a/arch/microblaze/bits/syscall.h
++++ b/arch/microblaze/bits/syscall.h
+@@ -381,6 +381,7 @@
+ #define __NR_seccomp 384
+ #define __NR_getrandom 385
+ #define __NR_memfd_create 386
++#define __NR_bpf 387
+ 
+ /* Repeated with SYS_ prefix */
+ 
+@@ -768,3 +769,4 @@
+ #define SYS_seccomp 384
+ #define SYS_getrandom 385
+ #define SYS_memfd_create 386
++#define SYS_bpf 387
+--- a/arch/microblaze/syscall_arch.h
++++ b/arch/microblaze/syscall_arch.h
+@@ -100,39 +100,7 @@ static inline long __syscall6(long n, lo
+ 
+ #else
+ 
+-static inline long __syscall0(long n)
+-{
+-	return (__syscall)(n);
+-}
+-
+-static inline long __syscall1(long n, long a)
+-{
+-	return (__syscall)(n, a);
+-}
+-
+-static inline long __syscall2(long n, long a, long b)
+-{
+-	return (__syscall)(n, a, b);
+-}
+-
+-static inline long __syscall3(long n, long a, long b, long c)
+-{
+-	return (__syscall)(n, a, b, c);
+-}
+-
+-static inline long __syscall4(long n, long a, long b, long c, long d)
+-{
+-	return (__syscall)(n, a, b, c, d);
+-}
+-
+-static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+-{
+-	return (__syscall)(n, a, b, c, d, e);
+-}
+-
+-static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+-{
+-	return (__syscall)(n, a, b, c, d, e, f);
+-}
++#undef SYSCALL_NO_INLINE
++#define SYSCALL_NO_INLINE
+ 
+ #endif
+--- a/arch/mips/bits/alltypes.h.in
++++ b/arch/mips/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF int wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ TYPEDEF float float_t;
+ TYPEDEF double double_t;
+--- a/arch/mips/bits/syscall.h
++++ b/arch/mips/bits/syscall.h
+@@ -352,6 +352,7 @@
+ #define __NR_seccomp                 4352
+ #define __NR_getrandom               4353
+ #define __NR_memfd_create            4354
++#define __NR_bpf                     4355
+ 
+ 
+ /* Repeated with SYS_ prefix */
+@@ -709,3 +710,4 @@
+ #define SYS_seccomp                 4352
+ #define SYS_getrandom               4353
+ #define SYS_memfd_create            4354
++#define SYS_bpf                     4355
+--- a/arch/or1k/bits/alltypes.h.in
++++ b/arch/or1k/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF unsigned wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ TYPEDEF float float_t;
+ TYPEDEF double double_t;
+--- a/arch/or1k/bits/syscall.h
++++ b/arch/or1k/bits/syscall.h
+@@ -263,6 +263,7 @@
+ #define __NR_seccomp 277
+ #define __NR_getrandom 278
+ #define __NR_memfd_create 279
++#define __NR_bpf 280
+ 
+ #define SYS_io_setup __NR_io_setup
+ #define SYS_io_destroy __NR_io_destroy
+@@ -529,3 +530,4 @@
+ #define SYS_seccomp __NR_seccomp
+ #define SYS_getrandom __NR_getrandom
+ #define SYS_memfd_create __NR_memfd_create
++#define SYS_bpf __NR_bpf
+--- a/arch/or1k/syscall_arch.h
++++ b/arch/or1k/syscall_arch.h
+@@ -1,7 +1,7 @@
+ #define __SYSCALL_LL_E(x) \
+ ((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
+ ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
+-#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
++#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
+ 
+ #define SYSCALL_MMAP2_UNIT 8192ULL
+ 
+@@ -118,39 +118,7 @@ static inline long __syscall6(long n, lo
+ 
+ #else
+ 
+-static inline long __syscall0(long n)
+-{
+-	return (__syscall)(n);
+-}
+-
+-static inline long __syscall1(long n, long a)
+-{
+-	return (__syscall)(n, a);
+-}
+-
+-static inline long __syscall2(long n, long a, long b)
+-{
+-	return (__syscall)(n, a, b);
+-}
+-
+-static inline long __syscall3(long n, long a, long b, long c)
+-{
+-	return (__syscall)(n, a, b, c);
+-}
+-
+-static inline long __syscall4(long n, long a, long b, long c, long d)
+-{
+-	return (__syscall)(n, a, b, c, d);
+-}
+-
+-static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+-{
+-	return (__syscall)(n, a, b, c, d, e);
+-}
+-
+-static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+-{
+-	return (__syscall)(n, a, b, c, d, e, f);
+-}
++#undef SYSCALL_NO_INLINE
++#define SYSCALL_NO_INLINE
+ 
+ #endif
+--- a/arch/powerpc/bits/alltypes.h.in
++++ b/arch/powerpc/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF long wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ TYPEDEF float float_t;
+ TYPEDEF double double_t;
+--- a/arch/powerpc/bits/syscall.h
++++ b/arch/powerpc/bits/syscall.h
+@@ -374,6 +374,7 @@
+ #define __NR_seccomp               358
+ #define __NR_getrandom             359
+ #define __NR_memfd_create          360
++#define __NR_bpf                   361
+ 
+ /*
+  * repeated with SYS prefix
+@@ -754,3 +755,4 @@
+ #define SYS_seccomp               358
+ #define SYS_getrandom             359
+ #define SYS_memfd_create          360
++#define SYS_bpf                   361
+--- a/arch/powerpc/syscall_arch.h
++++ b/arch/powerpc/syscall_arch.h
+@@ -3,39 +3,5 @@
+ ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
+ #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
+ 
+-long (__syscall)(long, ...);
+-
+-static inline long __syscall0(long n)
+-{
+-	return (__syscall)(n, 0, 0, 0, 0, 0, 0);
+-}
+-
+-static inline long __syscall1(long n, long a)
+-{
+-	return (__syscall)(n, a, 0, 0, 0, 0, 0);
+-}
+-
+-static inline long __syscall2(long n, long a, long b)
+-{
+-	return (__syscall)(n, a, b, 0, 0, 0, 0);
+-}
+-
+-static inline long __syscall3(long n, long a, long b, long c)
+-{
+-	return (__syscall)(n, a, b, c, 0, 0, 0);
+-}
+-
+-static inline long __syscall4(long n, long a, long b, long c, long d)
+-{
+-	return (__syscall)(n, a, b, c, d, 0, 0);
+-}
+-
+-static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+-{
+-	return (__syscall)(n, a, b, c, d, e, 0);
+-}
+-
+-static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+-{
+-	return (__syscall)(n, a, b, c, d, e, f);
+-}
++#undef SYSCALL_NO_INLINE
++#define SYSCALL_NO_INLINE
+--- a/arch/sh/bits/alltypes.h.in
++++ b/arch/sh/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF long wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ TYPEDEF float float_t;
+ TYPEDEF double double_t;
+--- a/arch/x32/bits/alltypes.h.in
++++ b/arch/x32/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF long wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 2
+ TYPEDEF long double float_t;
+--- a/arch/x32/bits/syscall.h
++++ b/arch/x32/bits/syscall.h
+@@ -277,6 +277,8 @@
+ #define __NR_seccomp (__X32_SYSCALL_BIT + 317)
+ #define __NR_getrandom (__X32_SYSCALL_BIT + 318)
+ #define __NR_memfd_create (__X32_SYSCALL_BIT + 319)
++#define __NR_kexec_file_load (__X32_SYSCALL_BIT + 320)
++#define __NR_bpf (__X32_SYSCALL_BIT + 321)
+ 
+ #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
+ #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
+@@ -604,6 +606,8 @@
+ #define SYS_seccomp __NR_seccomp
+ #define SYS_getrandom __NR_getrandom
+ #define SYS_memfd_create __NR_memfd_create
++#define SYS_kexec_file_load __NR_kexec_file_load
++#define SYS_bpf __NR_bpf
+ 
+ #define SYS_rt_sigaction __NR_rt_sigaction
+ #define SYS_rt_sigreturn __NR_rt_sigreturn
+--- a/arch/x86_64/bits/alltypes.h.in
++++ b/arch/x86_64/bits/alltypes.h.in
+@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list
+ #ifndef __cplusplus
+ TYPEDEF int wchar_t;
+ #endif
+-TYPEDEF unsigned wint_t;
+ 
+ #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 2
+ TYPEDEF long double float_t;
+--- a/arch/x86_64/bits/syscall.h
++++ b/arch/x86_64/bits/syscall.h
+@@ -318,6 +318,8 @@
+ #define __NR_seccomp				317
+ #define __NR_getrandom				318
+ #define __NR_memfd_create			319
++#define __NR_kexec_file_load			320
++#define __NR_bpf				321
+ 
+ 
+ #undef __NR_fstatat
+@@ -654,6 +656,8 @@
+ #define SYS_seccomp				317
+ #define SYS_getrandom				318
+ #define SYS_memfd_create			319
++#define SYS_kexec_file_load			320
++#define SYS_bpf					321
+ 
+ #undef SYS_fstatat
+ #undef SYS_pread
+--- a/include/alltypes.h.in
++++ b/include/alltypes.h.in
+@@ -28,6 +28,7 @@ TYPEDEF _Int64 blkcnt_t;
+ TYPEDEF unsigned _Int64 fsblkcnt_t;
+ TYPEDEF unsigned _Int64 fsfilcnt_t;
+ 
++TYPEDEF unsigned wint_t;
+ TYPEDEF unsigned long wctype_t;
+ 
+ TYPEDEF void * timer_t;
+--- a/include/arpa/nameser.h
++++ b/include/arpa/nameser.h
+@@ -1,6 +1,11 @@
+ #ifndef _ARPA_NAMESER_H
+ #define _ARPA_NAMESER_H
+ 
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#include <stddef.h>
+ #include <stdint.h>
+ 
+ #define __NAMESER	19991006
+@@ -48,6 +53,8 @@ extern const struct _ns_flagdata _ns_fla
+ #define ns_msg_end(handle) ((handle)._eom + 0)
+ #define ns_msg_size(handle) ((handle)._eom - (handle)._msg)
+ #define ns_msg_count(handle, section) ((handle)._counts[section] + 0)
++#define ns_msg_getflag(handle, flag) \
++	(((handle)._flags & _ns_flagdata[flag].mask) >> _ns_flagdata[flag].shift)
+ 
+ typedef	struct __ns_rr {
+ 	char		name[NS_MAXDNAME];
+@@ -296,43 +303,20 @@ typedef enum __ns_cert_types {
+ #define NS_OPT_DNSSEC_OK        0x8000U
+ #define NS_OPT_NSID		3
+ 
+-#define NS_GET16(s, cp) do { \
+-	register const unsigned char *t_cp = (const unsigned char *)(cp); \
+-	(s) = ((uint16_t)t_cp[0] << 8) \
+-	    | ((uint16_t)t_cp[1]) \
+-	    ; \
+-	(cp) += NS_INT16SZ; \
+-} while (0)
+-
+-#define NS_GET32(l, cp) do { \
+-	register const unsigned char *t_cp = (const unsigned char *)(cp); \
+-	(l) = ((uint32_t)t_cp[0] << 24) \
+-	    | ((uint32_t)t_cp[1] << 16) \
+-	    | ((uint32_t)t_cp[2] << 8) \
+-	    | ((uint32_t)t_cp[3]) \
+-	    ; \
+-	(cp) += NS_INT32SZ; \
+-} while (0)
+-
+-#define NS_PUT16(s, cp) do { \
+-	register uint16_t t_s = (uint16_t)(s); \
+-	register unsigned char *t_cp = (unsigned char *)(cp); \
+-	*t_cp++ = t_s >> 8; \
+-	*t_cp   = t_s; \
+-	(cp) += NS_INT16SZ; \
+-} while (0)
+-
+-#define NS_PUT32(l, cp) do { \
+-	register uint32_t t_l = (uint32_t)(l); \
+-	register unsigned char *t_cp = (unsigned char *)(cp); \
+-	*t_cp++ = t_l >> 24; \
+-	*t_cp++ = t_l >> 16; \
+-	*t_cp++ = t_l >> 8; \
+-	*t_cp   = t_l; \
+-	(cp) += NS_INT32SZ; \
+-} while (0)
+-
+-
++#define NS_GET16(s, cp) (void)((s) = ns_get16(((cp)+=2)-2))
++#define NS_GET32(l, cp) (void)((l) = ns_get32(((cp)+=4)-4))
++#define NS_PUT16(s, cp) ns_put16((s), ((cp)+=2)-2)
++#define NS_PUT32(l, cp) ns_put32((l), ((cp)+=4)-4)
++
++unsigned ns_get16(const unsigned char *);
++unsigned long ns_get32(const unsigned char *);
++void ns_put16(unsigned, unsigned char *);
++void ns_put32(unsigned long, unsigned char *);
++
++int ns_initparse(const unsigned char *, int, ns_msg *);
++int ns_parserr(ns_msg *, ns_sect, int, ns_rr *);
++int ns_skiprr(const unsigned char *, const unsigned char *, ns_sect, int);
++int ns_name_uncompress(const unsigned char *, const unsigned char *, const unsigned char *, char *, size_t);
+ 
+ 
+ #define	__BIND		19950621
+@@ -464,4 +448,8 @@ typedef struct {
+ #define	PUTSHORT		NS_PUT16
+ #define	PUTLONG			NS_PUT32
+ 
++#ifdef __cplusplus
++}
++#endif
++
+ #endif
+--- a/include/complex.h
++++ b/include/complex.h
+@@ -7,9 +7,9 @@ extern "C" {
+ 
+ #define complex _Complex
+ #ifdef __GNUC__
+-#define _Complex_I (__extension__ 1.0fi)
++#define _Complex_I (__extension__ (0.0f+1.0fi))
+ #else
+-#define _Complex_I 1.0fi
++#define _Complex_I (0.0f+1.0fi)
+ #endif
+ #define I _Complex_I
+ 
+@@ -101,8 +101,9 @@ double creal(double complex);
+ float crealf(float complex);
+ long double creall(long double complex);
+ 
++#ifndef __cplusplus
+ #define __CIMAG(x, t) \
+-	((union { _Complex t __z; t __xy[2]; }){(_Complex t)(x)}.__xy[1])
++	(+(union { _Complex t __z; t __xy[2]; }){(_Complex t)(x)}.__xy[1])
+ 
+ #define creal(x) ((double)(x))
+ #define crealf(x) ((float)(x))
+@@ -111,13 +112,20 @@ long double creall(long double complex);
+ #define cimag(x) __CIMAG(x, double)
+ #define cimagf(x) __CIMAG(x, float)
+ #define cimagl(x) __CIMAG(x, long double)
++#endif
+ 
+-#define __CMPLX(x, y, t) \
+-	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
+-
++#if __STDC_VERSION__ >= 201112L
++#if defined(_Imaginary_I)
++#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)))
++#elif defined(__clang__)
++#define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) })
++#else
++#define __CMPLX(x, y, t) (__builtin_complex((t)(x), (t)(y)))
++#endif
+ #define CMPLX(x, y) __CMPLX(x, y, double)
+ #define CMPLXF(x, y) __CMPLX(x, y, float)
+ #define CMPLXL(x, y) __CMPLX(x, y, long double)
++#endif
+ 
+ #ifdef __cplusplus
+ }
+--- a/include/pthread.h
++++ b/include/pthread.h
+@@ -84,7 +84,9 @@ __attribute__((const))
+ pthread_t pthread_self(void);
+ 
+ int pthread_equal(pthread_t, pthread_t);
++#ifndef __cplusplus
+ #define pthread_equal(x,y) ((x)==(y))
++#endif
+ 
+ int pthread_setcancelstate(int, int *);
+ int pthread_setcanceltype(int, int *);
+--- a/include/stdint.h
++++ b/include/stdint.h
+@@ -47,8 +47,8 @@ typedef uint64_t uint_least64_t;
+ 
+ #define UINT8_MAX  (0xff)
+ #define UINT16_MAX (0xffff)
+-#define UINT32_MAX (0xffffffff)
+-#define UINT64_MAX (0xffffffffffffffff)
++#define UINT32_MAX (0xffffffffu)
++#define UINT64_MAX (0xffffffffffffffffu)
+ 
+ #define INT_FAST8_MIN   INT8_MIN
+ #define INT_FAST64_MIN  INT64_MIN
+@@ -78,7 +78,7 @@ typedef uint64_t uint_least64_t;
+ #define INTMAX_MAX  INT64_MAX
+ #define UINTMAX_MAX UINT64_MAX
+ 
+-#define WINT_MIN 0
++#define WINT_MIN 0U
+ #define WINT_MAX UINT32_MAX
+ 
+ #if L'\0'-1 > 0
+--- a/include/sys/prctl.h
++++ b/include/sys/prctl.h
+@@ -5,6 +5,8 @@
+ extern "C" {
+ #endif
+ 
++#include <stdint.h>
++
+ #define PR_SET_PDEATHSIG  1
+ #define PR_GET_PDEATHSIG  2
+ #define PR_GET_DUMPABLE   3
+@@ -80,6 +82,25 @@ extern "C" {
+ #define PR_SET_MM_ENV_END              11
+ #define PR_SET_MM_AUXV                 12
+ #define PR_SET_MM_EXE_FILE             13
++#define PR_SET_MM_MAP                  14
++#define PR_SET_MM_MAP_SIZE             15
++
++struct prctl_mm_map {
++	uint64_t start_code;
++	uint64_t end_code;
++	uint64_t start_data;
++	uint64_t end_data;
++	uint64_t start_brk;
++	uint64_t brk;
++	uint64_t start_stack;
++	uint64_t arg_start;
++	uint64_t arg_end;
++	uint64_t env_start;
++	uint64_t env_end;
++	uint64_t *auxv;
++	uint32_t auxv_size;
++	uint32_t exe_fd;
++};
+ 
+ #define PR_SET_PTRACER 0x59616d61
+ #define PR_SET_PTRACER_ANY (-1UL)
+--- a/include/threads.h
++++ b/include/threads.h
+@@ -51,7 +51,9 @@ void thrd_yield(void);
+ 
+ thrd_t thrd_current(void);
+ int thrd_equal(thrd_t, thrd_t);
++#ifndef __cplusplus
+ #define thrd_equal(A, B) ((A) == (B))
++#endif
+ 
+ void call_once(once_flag *, void (*)(void));
+ 
+--- a/include/utmp.h
++++ b/include/utmp.h
+@@ -35,6 +35,8 @@ void         setutent(void);
+ 
+ void updwtmp(const char *, const struct utmp *);
+ 
++int login_tty(int);
++
+ #define _PATH_UTMP "/dev/null/utmp"
+ #define _PATH_WTMP "/dev/null/wtmp"
+ 
+--- a/src/fcntl/open.c
++++ b/src/fcntl/open.c
+@@ -7,7 +7,7 @@ int open(const char *filename, int flags
+ {
+ 	mode_t mode = 0;
+ 
+-	if (flags & O_CREAT) {
++	if ((flags & O_CREAT) || (flags & O_TMPFILE) == O_TMPFILE) {
+ 		va_list ap;
+ 		va_start(ap, flags);
+ 		mode = va_arg(ap, mode_t);
+--- a/src/fcntl/openat.c
++++ b/src/fcntl/openat.c
+@@ -5,11 +5,15 @@
+ 
+ int openat(int fd, const char *filename, int flags, ...)
+ {
+-	mode_t mode;
+-	va_list ap;
+-	va_start(ap, flags);
+-	mode = va_arg(ap, mode_t);
+-	va_end(ap);
++	mode_t mode = 0;
++
++	if ((flags & O_CREAT) || (flags & O_TMPFILE) == O_TMPFILE) {
++		va_list ap;
++		va_start(ap, flags);
++		mode = va_arg(ap, mode_t);
++		va_end(ap);
++	}
++
+ 	return syscall_cp(SYS_openat, fd, filename, flags|O_LARGEFILE, mode);
+ }
+ 
+--- a/src/internal/libm.h
++++ b/src/internal/libm.h
+@@ -128,6 +128,18 @@ do {                                    
+   (d) = __u.f;                                    \
+ } while (0)
+ 
++#undef __CMPLX
++#undef CMPLX
++#undef CMPLXF
++#undef CMPLXL
++
++#define __CMPLX(x, y, t) \
++	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
++
++#define CMPLX(x, y) __CMPLX(x, y, double)
++#define CMPLXF(x, y) __CMPLX(x, y, float)
++#define CMPLXL(x, y) __CMPLX(x, y, long double)
++
+ /* fdlibm kernel functions */
+ 
+ int    __rem_pio2_large(double*,double*,int,int,int);
+--- a/src/internal/syscall.h
++++ b/src/internal/syscall.h
+@@ -24,12 +24,22 @@ long __syscall_ret(unsigned long), __sys
+ 	__syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
+ 	             syscall_arg_t, syscall_arg_t, syscall_arg_t);
+ 
++#ifdef SYSCALL_NO_INLINE
++#define __syscall0(n) (__syscall)(n)
++#define __syscall1(n,a) (__syscall)(n,__scc(a))
++#define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b))
++#define __syscall3(n,a,b,c) (__syscall)(n,__scc(a),__scc(b),__scc(c))
++#define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d))
++#define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
++#define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
++#else
+ #define __syscall1(n,a) __syscall1(n,__scc(a))
+ #define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b))
+ #define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c))
+ #define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d))
+ #define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
+ #define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
++#endif
+ #define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
+ 
+ #define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
+--- a/src/ldso/dynlink.c
++++ b/src/ldso/dynlink.c
+@@ -645,6 +645,8 @@ static void decode_dyn(struct dso *p)
+ 		p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+ 	if (dyn[0]&(1<<DT_RPATH))
+ 		p->rpath_orig = (void *)(p->strings + dyn[DT_RPATH]);
++	if (dyn[0]&(1<<DT_RUNPATH))
++		p->rpath_orig = (void *)(p->strings + dyn[DT_RUNPATH]);
+ 	if (search_vec(p->dynv, dyn, DT_GNU_HASH))
+ 		p->ghashtab = (void *)(p->base + *dyn);
+ 	if (search_vec(p->dynv, dyn, DT_VERSYM))
+@@ -1126,6 +1128,7 @@ void *__dynlink(int argc, char **argv)
+ 		libc.secure = 1;
+ 	}
+ 	libc.page_size = aux[AT_PAGESZ];
++	libc.auxv = auxv;
+ 
+ 	/* If the dynamic linker was invoked as a program itself, AT_BASE
+ 	 * will not be set. In that case, we assume the base address is
+--- a/src/math/__rem_pio2.c
++++ b/src/math/__rem_pio2.c
+@@ -19,6 +19,12 @@
+ 
+ #include "libm.h"
+ 
++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++
+ /*
+  * invpio2:  53 bits of 2/pi
+  * pio2_1:   first  33 bit of pi/2
+@@ -29,6 +35,7 @@
+  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
+  */
+ static const double
++toint   = 1.5/EPS,
+ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+ pio2_1  = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
+ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
+@@ -41,8 +48,8 @@ pio2_3t = 8.47842766036889956997e-32; /*
+ int __rem_pio2(double x, double *y)
+ {
+ 	union {double f; uint64_t i;} u = {x};
+-	double_t z,w,t,r;
+-	double tx[3],ty[2],fn;
++	double_t z,w,t,r,fn;
++	double tx[3],ty[2];
+ 	uint32_t ix;
+ 	int sign, n, ex, ey, i;
+ 
+@@ -111,8 +118,7 @@ int __rem_pio2(double x, double *y)
+ 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
+ medium:
+ 		/* rint(x/(pi/2)), Assume round-to-nearest. */
+-		fn = x*invpio2 + 0x1.8p52;
+-		fn = fn - 0x1.8p52;
++		fn = x*invpio2 + toint - toint;
+ 		n = (int32_t)fn;
+ 		r = x - fn*pio2_1;
+ 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
+--- a/src/math/__rem_pio2f.c
++++ b/src/math/__rem_pio2f.c
+@@ -22,12 +22,19 @@
+ 
+ #include "libm.h"
+ 
++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++
+ /*
+  * invpio2:  53 bits of 2/pi
+  * pio2_1:   first 25 bits of pi/2
+  * pio2_1t:  pi/2 - pio2_1
+  */
+ static const double
++toint   = 1.5/EPS,
+ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+ pio2_1  = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
+ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
+@@ -35,7 +42,8 @@ pio2_1t = 1.58932547735281966916e-08; /*
+ int __rem_pio2f(float x, double *y)
+ {
+ 	union {float f; uint32_t i;} u = {x};
+-	double tx[1],ty[1],fn;
++	double tx[1],ty[1];
++	double_t fn;
+ 	uint32_t ix;
+ 	int n, sign, e0;
+ 
+@@ -43,8 +51,7 @@ int __rem_pio2f(float x, double *y)
+ 	/* 25+53 bit pi is good enough for medium size */
+ 	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */
+ 		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
+-		fn = x*invpio2 + 0x1.8p52;
+-		fn = fn - 0x1.8p52;
++		fn = x*invpio2 + toint - toint;
+ 		n  = (int32_t)fn;
+ 		*y = x - fn*pio2_1 - fn*pio2_1t;
+ 		return n;
+--- a/src/math/__rem_pio2l.c
++++ b/src/math/__rem_pio2l.c
+@@ -20,10 +20,11 @@
+  * use __rem_pio2_large() for large x
+  */
+ 
++static const long double toint = 1.5/LDBL_EPSILON;
++
+ #if LDBL_MANT_DIG == 64
+ /* u ~< 0x1p25*pi/2 */
+ #define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.m>>48) < ((0x3fff + 25)<<16 | 0x921f>>1 | 0x8000))
+-#define TOINT 0x1.8p63
+ #define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff)
+ #define ROUND1 22
+ #define ROUND2 61
+@@ -50,7 +51,6 @@ pio2_3t = -2.75299651904407171810e-37L; 
+ #elif LDBL_MANT_DIG == 113
+ /* u ~< 0x1p45*pi/2 */
+ #define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.top) < ((0x3fff + 45)<<16 | 0x921f))
+-#define TOINT 0x1.8p112
+ #define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff)
+ #define ROUND1 51
+ #define ROUND2 119
+@@ -77,7 +77,7 @@ int __rem_pio2l(long double x, long doub
+ 	ex = u.i.se & 0x7fff;
+ 	if (SMALL(u)) {
+ 		/* rint(x/(pi/2)), Assume round-to-nearest. */
+-		fn = x*invpio2 + TOINT - TOINT;
++		fn = x*invpio2 + toint - toint;
+ 		n = QUOBITS(fn);
+ 		r = x-fn*pio2_1;
+ 		w = fn*pio2_1t;  /* 1st round good to 102/180 bits (ld80/ld128) */
+--- a/src/math/ceil.c
++++ b/src/math/ceil.c
+@@ -1,5 +1,12 @@
+ #include "libm.h"
+ 
++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++static const double_t toint = 1/EPS;
++
+ double ceil(double x)
+ {
+ 	union {double f; uint64_t i;} u = {x};
+@@ -10,9 +17,9 @@ double ceil(double x)
+ 		return x;
+ 	/* y = int(x) - x, where int(x) is an integer neighbor of x */
+ 	if (u.i >> 63)
+-		y = (double)(x - 0x1p52) + 0x1p52 - x;
++		y = x - toint + toint - x;
+ 	else
+-		y = (double)(x + 0x1p52) - 0x1p52 - x;
++		y = x + toint - toint - x;
+ 	/* special case because of non-nearest rounding modes */
+ 	if (e <= 0x3ff-1) {
+ 		FORCE_EVAL(y);
+--- a/src/math/ceill.c
++++ b/src/math/ceill.c
+@@ -6,11 +6,9 @@ long double ceill(long double x)
+ 	return ceil(x);
+ }
+ #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+-#if LDBL_MANT_DIG == 64
+-#define TOINT 0x1p63
+-#elif LDBL_MANT_DIG == 113
+-#define TOINT 0x1p112
+-#endif
++
++static const long double toint = 1/LDBL_EPSILON;
++
+ long double ceill(long double x)
+ {
+ 	union ldshape u = {x};
+@@ -21,9 +19,9 @@ long double ceill(long double x)
+ 		return x;
+ 	/* y = int(x) - x, where int(x) is an integer neighbor of x */
+ 	if (u.i.se >> 15)
+-		y = x - TOINT + TOINT - x;
++		y = x - toint + toint - x;
+ 	else
+-		y = x + TOINT - TOINT - x;
++		y = x + toint - toint - x;
+ 	/* special case because of non-nearest rounding modes */
+ 	if (e <= 0x3fff-1) {
+ 		FORCE_EVAL(y);
+--- a/src/math/floor.c
++++ b/src/math/floor.c
+@@ -1,5 +1,12 @@
+ #include "libm.h"
+ 
++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++static const double_t toint = 1/EPS;
++
+ double floor(double x)
+ {
+ 	union {double f; uint64_t i;} u = {x};
+@@ -10,9 +17,9 @@ double floor(double x)
+ 		return x;
+ 	/* y = int(x) - x, where int(x) is an integer neighbor of x */
+ 	if (u.i >> 63)
+-		y = (double)(x - 0x1p52) + 0x1p52 - x;
++		y = x - toint + toint - x;
+ 	else
+-		y = (double)(x + 0x1p52) - 0x1p52 - x;
++		y = x + toint - toint - x;
+ 	/* special case because of non-nearest rounding modes */
+ 	if (e <= 0x3ff-1) {
+ 		FORCE_EVAL(y);
+--- a/src/math/floorl.c
++++ b/src/math/floorl.c
+@@ -6,11 +6,9 @@ long double floorl(long double x)
+ 	return floor(x);
+ }
+ #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+-#if LDBL_MANT_DIG == 64
+-#define TOINT 0x1p63
+-#elif LDBL_MANT_DIG == 113
+-#define TOINT 0x1p112
+-#endif
++
++static const long double toint = 1/LDBL_EPSILON;
++
+ long double floorl(long double x)
+ {
+ 	union ldshape u = {x};
+@@ -21,9 +19,9 @@ long double floorl(long double x)
+ 		return x;
+ 	/* y = int(x) - x, where int(x) is an integer neighbor of x */
+ 	if (u.i.se >> 15)
+-		y = x - TOINT + TOINT - x;
++		y = x - toint + toint - x;
+ 	else
+-		y = x + TOINT - TOINT - x;
++		y = x + toint - toint - x;
+ 	/* special case because of non-nearest rounding modes */
+ 	if (e <= 0x3fff-1) {
+ 		FORCE_EVAL(y);
+--- a/src/math/i386/fmod.s
++++ b/src/math/i386/fmod.s
+@@ -4,7 +4,7 @@ fmod:
+ 	fldl 12(%esp)
+ 	fldl 4(%esp)
+ 1:	fprem
+-	fstsw %ax
++	fnstsw %ax
+ 	sahf
+ 	jp 1b
+ 	fstp %st(1)
+--- a/src/math/i386/fmodf.s
++++ b/src/math/i386/fmodf.s
+@@ -4,7 +4,7 @@ fmodf:
+ 	flds 8(%esp)
+ 	flds 4(%esp)
+ 1:	fprem
+-	fstsw %ax
++	fnstsw %ax
+ 	sahf
+ 	jp 1b
+ 	fstp %st(1)
+--- a/src/math/i386/fmodl.s
++++ b/src/math/i386/fmodl.s
+@@ -4,7 +4,7 @@ fmodl:
+ 	fldt 16(%esp)
+ 	fldt 4(%esp)
+ 1:	fprem
+-	fstsw %ax
++	fnstsw %ax
+ 	sahf
+ 	jp 1b
+ 	fstp %st(1)
+--- a/src/math/i386/remainder.s
++++ b/src/math/i386/remainder.s
+@@ -7,7 +7,7 @@ drem:
+ 	fldl 12(%esp)
+ 	fldl 4(%esp)
+ 1:	fprem1
+-	fstsw %ax
++	fnstsw %ax
+ 	sahf
+ 	jp 1b
+ 	fstp %st(1)
+--- a/src/math/i386/remainderf.s
++++ b/src/math/i386/remainderf.s
+@@ -7,7 +7,7 @@ dremf:
+ 	flds 8(%esp)
+ 	flds 4(%esp)
+ 1:	fprem1
+-	fstsw %ax
++	fnstsw %ax
+ 	sahf
+ 	jp 1b
+ 	fstp %st(1)
+--- a/src/math/i386/remainderl.s
++++ b/src/math/i386/remainderl.s
+@@ -4,7 +4,7 @@ remainderl:
+ 	fldt 16(%esp)
+ 	fldt 4(%esp)
+ 1:	fprem1
+-	fstsw %ax
++	fnstsw %ax
+ 	sahf
+ 	jp 1b
+ 	fstp %st(1)
+--- a/src/math/i386/sqrt.s
++++ b/src/math/i386/sqrt.s
+@@ -2,7 +2,7 @@
+ .type sqrt,@function
+ sqrt:	fldl 4(%esp)
+ 	fsqrt
+-	fstsw %ax
++	fnstsw %ax
+ 	sub $12,%esp
+ 	fld %st(0)
+ 	fstpt (%esp)
+--- a/src/math/modfl.c
++++ b/src/math/modfl.c
+@@ -11,11 +11,9 @@ long double modfl(long double x, long do
+ 	return r;
+ }
+ #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+-#if LDBL_MANT_DIG == 64
+-#define TOINT 0x1p63
+-#elif LDBL_MANT_DIG == 113
+-#define TOINT 0x1p112
+-#endif
++
++static const long double toint = 1/LDBL_EPSILON;
++
+ long double modfl(long double x, long double *iptr)
+ {
+ 	union ldshape u = {x};
+@@ -40,7 +38,7 @@ long double modfl(long double x, long do
+ 
+ 	/* raises spurious inexact */
+ 	absx = s ? -x : x;
+-	y = absx + TOINT - TOINT - absx;
++	y = absx + toint - toint - absx;
+ 	if (y == 0) {
+ 		*iptr = x;
+ 		return s ? -0.0 : 0.0;
+--- a/src/math/rint.c
++++ b/src/math/rint.c
+@@ -1,6 +1,14 @@
++#include <float.h>
+ #include <math.h>
+ #include <stdint.h>
+ 
++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++static const double_t toint = 1/EPS;
++
+ double rint(double x)
+ {
+ 	union {double f; uint64_t i;} u = {x};
+@@ -11,9 +19,9 @@ double rint(double x)
+ 	if (e >= 0x3ff+52)
+ 		return x;
+ 	if (s)
+-		y = (double)(x - 0x1p52) + 0x1p52;
++		y = x - toint + toint;
+ 	else
+-		y = (double)(x + 0x1p52) - 0x1p52;
++		y = x + toint - toint;
+ 	if (y == 0)
+ 		return s ? -0.0 : 0;
+ 	return y;
+--- a/src/math/rintf.c
++++ b/src/math/rintf.c
+@@ -1,6 +1,16 @@
++#include <float.h>
+ #include <math.h>
+ #include <stdint.h>
+ 
++#if FLT_EVAL_METHOD==0
++#define EPS FLT_EPSILON
++#elif FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++static const float_t toint = 1/EPS;
++
+ float rintf(float x)
+ {
+ 	union {float f; uint32_t i;} u = {x};
+@@ -11,9 +21,9 @@ float rintf(float x)
+ 	if (e >= 0x7f+23)
+ 		return x;
+ 	if (s)
+-		y = (float)(x - 0x1p23f) + 0x1p23f;
++		y = x - toint + toint;
+ 	else
+-		y = (float)(x + 0x1p23f) - 0x1p23f;
++		y = x + toint - toint;
+ 	if (y == 0)
+ 		return s ? -0.0f : 0.0f;
+ 	return y;
+--- a/src/math/rintl.c
++++ b/src/math/rintl.c
+@@ -6,11 +6,9 @@ long double rintl(long double x)
+ 	return rint(x);
+ }
+ #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+-#if LDBL_MANT_DIG == 64
+-#define TOINT 0x1p63
+-#elif LDBL_MANT_DIG == 113
+-#define TOINT 0x1p112
+-#endif
++
++static const long double toint = 1/LDBL_EPSILON;
++
+ long double rintl(long double x)
+ {
+ 	union ldshape u = {x};
+@@ -21,9 +19,9 @@ long double rintl(long double x)
+ 	if (e >= 0x3fff+LDBL_MANT_DIG-1)
+ 		return x;
+ 	if (s)
+-		y = x - TOINT + TOINT;
++		y = x - toint + toint;
+ 	else
+-		y = x + TOINT - TOINT;
++		y = x + toint - toint;
+ 	if (y == 0)
+ 		return 0*x;
+ 	return y;
+--- a/src/math/round.c
++++ b/src/math/round.c
+@@ -1,5 +1,12 @@
+ #include "libm.h"
+ 
++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++static const double_t toint = 1/EPS;
++
+ double round(double x)
+ {
+ 	union {double f; uint64_t i;} u = {x};
+@@ -12,10 +19,10 @@ double round(double x)
+ 		x = -x;
+ 	if (e < 0x3ff-1) {
+ 		/* raise inexact if x!=0 */
+-		FORCE_EVAL(x + 0x1p52);
++		FORCE_EVAL(x + toint);
+ 		return 0*u.f;
+ 	}
+-	y = (double)(x + 0x1p52) - 0x1p52 - x;
++	y = x + toint - toint - x;
+ 	if (y > 0.5)
+ 		y = y + x - 1;
+ 	else if (y <= -0.5)
+--- a/src/math/roundf.c
++++ b/src/math/roundf.c
+@@ -1,5 +1,14 @@
+ #include "libm.h"
+ 
++#if FLT_EVAL_METHOD==0
++#define EPS FLT_EPSILON
++#elif FLT_EVAL_METHOD==1
++#define EPS DBL_EPSILON
++#elif FLT_EVAL_METHOD==2
++#define EPS LDBL_EPSILON
++#endif
++static const float_t toint = 1/EPS;
++
+ float roundf(float x)
+ {
+ 	union {float f; uint32_t i;} u = {x};
+@@ -11,10 +20,10 @@ float roundf(float x)
+ 	if (u.i >> 31)
+ 		x = -x;
+ 	if (e < 0x7f-1) {
+-		FORCE_EVAL(x + 0x1p23f);
++		FORCE_EVAL(x + toint);
+ 		return 0*u.f;
+ 	}
+-	y = (float)(x + 0x1p23f) - 0x1p23f - x;
++	y = x + toint - toint - x;
+ 	if (y > 0.5f)
+ 		y = y + x - 1;
+ 	else if (y <= -0.5f)
+--- a/src/math/roundl.c
++++ b/src/math/roundl.c
+@@ -6,11 +6,9 @@ long double roundl(long double x)
+ 	return round(x);
+ }
+ #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+-#if LDBL_MANT_DIG == 64
+-#define TOINT 0x1p63
+-#elif LDBL_MANT_DIG == 113
+-#define TOINT 0x1p112
+-#endif
++
++static const long double toint = 1/LDBL_EPSILON;
++
+ long double roundl(long double x)
+ {
+ 	union ldshape u = {x};
+@@ -22,10 +20,10 @@ long double roundl(long double x)
+ 	if (u.i.se >> 15)
+ 		x = -x;
+ 	if (e < 0x3fff-1) {
+-		FORCE_EVAL(x + TOINT);
++		FORCE_EVAL(x + toint);
+ 		return 0*u.f;
+ 	}
+-	y = x + TOINT - TOINT - x;
++	y = x + toint - toint - x;
+ 	if (y > 0.5)
+ 		y = y + x - 1;
+ 	else if (y <= -0.5)
+--- a/src/math/truncl.c
++++ b/src/math/truncl.c
+@@ -6,11 +6,9 @@ long double truncl(long double x)
+ 	return trunc(x);
+ }
+ #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+-#if LDBL_MANT_DIG == 64
+-#define TOINT 0x1p63
+-#elif LDBL_MANT_DIG == 113
+-#define TOINT 0x1p112
+-#endif
++
++static const long double toint = 1/LDBL_EPSILON;
++
+ long double truncl(long double x)
+ {
+ 	union ldshape u = {x};
+@@ -27,7 +25,7 @@ long double truncl(long double x)
+ 	/* y = int(|x|) - |x|, where int(|x|) is an integer neighbor of |x| */
+ 	if (s)
+ 		x = -x;
+-	y = x + TOINT - TOINT - x;
++	y = x + toint - toint - x;
+ 	if (y > 0)
+ 		y -= 1;
+ 	x += y;
+--- a/src/math/x32/exp2l.s
++++ b/src/math/x32/exp2l.s
+@@ -6,9 +6,7 @@ expm1l:
+ 	fmulp
+ 	movl $0xc2820000,-4(%esp)
+ 	flds -4(%esp)
+-	fucomp %st(1)
+-	fnstsw %ax
+-	sahf
++	fucomip %st(1)
+ 	fld1
+ 	jb 1f
+ 		# x*log2e <= -65, return -1 without underflow
+@@ -17,11 +15,8 @@ expm1l:
+ 	ret
+ 1:	fld %st(1)
+ 	fabs
+-	fucom %st(1)
+-	fnstsw %ax
++	fucomip %st(1)
+ 	fstp %st(0)
+-	fstp %st(0)
+-	sahf
+ 	ja 1f
+ 	f2xm1
+ 	ret
+@@ -53,9 +48,7 @@ exp2l:
+ 	fld %st(1)
+ 	fsub %st(1)
+ 	faddp
+-	fucomp %st(1)
+-	fnstsw
+-	sahf
++	fucomip %st(1)
+ 	je 2f             # x - 0x1p63 + 0x1p63 == x
+ 	movl $1,(%esp)
+ 	flds (%esp)       # 0x1p-149
+--- a/src/math/x32/fmodl.s
++++ b/src/math/x32/fmodl.s
+@@ -4,8 +4,8 @@ fmodl:
+ 	fldt 24(%esp)
+ 	fldt 8(%esp)
+ 1:	fprem
+-	fstsw %ax
+-	sahf
+-	jp 1b
++	fnstsw %ax
++	testb $4,%ah
++	jnz 1b
+ 	fstp %st(1)
+ 	ret
+--- a/src/math/x32/remainderl.s
++++ b/src/math/x32/remainderl.s
+@@ -4,8 +4,8 @@ remainderl:
+ 	fldt 24(%esp)
+ 	fldt 8(%esp)
+ 1:	fprem1
+-	fstsw %ax
+-	sahf
+-	jp 1b
++	fnstsw %ax
++	testb $4,%ah
++	jnz 1b
+ 	fstp %st(1)
+ 	ret
+--- a/src/math/x86_64/exp2l.s
++++ b/src/math/x86_64/exp2l.s
+@@ -6,9 +6,7 @@ expm1l:
+ 	fmulp
+ 	movl $0xc2820000,-4(%rsp)
+ 	flds -4(%rsp)
+-	fucomp %st(1)
+-	fnstsw %ax
+-	sahf
++	fucomip %st(1)
+ 	fld1
+ 	jb 1f
+ 		# x*log2e <= -65, return -1 without underflow
+@@ -17,11 +15,8 @@ expm1l:
+ 	ret
+ 1:	fld %st(1)
+ 	fabs
+-	fucom %st(1)
+-	fnstsw %ax
++	fucomip %st(1)
+ 	fstp %st(0)
+-	fstp %st(0)
+-	sahf
+ 	ja 1f
+ 	f2xm1
+ 	ret
+@@ -53,9 +48,7 @@ exp2l:
+ 	fld %st(1)
+ 	fsub %st(1)
+ 	faddp
+-	fucomp %st(1)
+-	fnstsw
+-	sahf
++	fucomip %st(1)
+ 	je 2f             # x - 0x1p63 + 0x1p63 == x
+ 	movl $1,(%rsp)
+ 	flds (%rsp)       # 0x1p-149
+--- a/src/math/x86_64/fmodl.s
++++ b/src/math/x86_64/fmodl.s
+@@ -4,8 +4,8 @@ fmodl:
+ 	fldt 24(%rsp)
+ 	fldt 8(%rsp)
+ 1:	fprem
+-	fstsw %ax
+-	sahf
+-	jp 1b
++	fnstsw %ax
++	testb $4,%ah
++	jnz 1b
+ 	fstp %st(1)
+ 	ret
+--- a/src/math/x86_64/remainderl.s
++++ b/src/math/x86_64/remainderl.s
+@@ -4,8 +4,8 @@ remainderl:
+ 	fldt 24(%rsp)
+ 	fldt 8(%rsp)
+ 1:	fprem1
+-	fstsw %ax
+-	sahf
+-	jp 1b
++	fnstsw %ax
++	testb $4,%ah
++	jnz 1b
+ 	fstp %st(1)
+ 	ret
+--- a/src/misc/forkpty.c
++++ b/src/misc/forkpty.c
+@@ -1,38 +1,57 @@
+ #include <pty.h>
++#include <utmp.h>
+ #include <unistd.h>
+-#include <sys/ioctl.h>
++#include <errno.h>
+ #include <fcntl.h>
++#include <sys/wait.h>
++#include <pthread.h>
+ 
+-int forkpty(int *m, char *name, const struct termios *tio, const struct winsize *ws)
++int forkpty(int *pm, char *name, const struct termios *tio, const struct winsize *ws)
+ {
+-	int s, t, i, istmp[3]={0};
+-	pid_t pid;
++	int m, s, ec=0, p[2], cs;
++	pid_t pid=-1;
++	sigset_t set, oldset;
+ 
+-	if (openpty(m, &s, name, tio, ws) < 0) return -1;
++	if (openpty(&m, &s, name, tio, ws) < 0) return -1;
+ 
+-	/* Ensure before forking that we don't exceed fd limit */
+-	for (i=0; i<3; i++) {
+-		if (fcntl(i, F_GETFL) < 0) {
+-			t = fcntl(s, F_DUPFD, i);
+-			if (t<0) break;
+-			else if (t!=i) close(t);
+-			else istmp[i] = 1;
+-		}
++	sigfillset(&set);
++	pthread_sigmask(SIG_BLOCK, &set, &oldset);
++	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
++
++	if (pipe2(p, O_CLOEXEC)) {
++		close(s);
++		goto out;
+ 	}
+-	pid = i==3 ? fork() : -1;
++
++	pid = fork();
+ 	if (!pid) {
+-		close(*m);
+-		setsid();
+-		ioctl(s, TIOCSCTTY, (char *)0);
+-		dup2(s, 0);
+-		dup2(s, 1);
+-		dup2(s, 2);
+-		if (s>2) close(s);
++		close(m);
++		close(p[0]);
++		if (login_tty(s)) {
++			write(p[1], &errno, sizeof errno);
++			_exit(127);
++		}
++		close(p[1]);
++		pthread_setcancelstate(cs, 0);
++		pthread_sigmask(SIG_SETMASK, &oldset, 0);
+ 		return 0;
+ 	}
+-	for (i=0; i<3; i++)
+-		if (istmp[i]) close(i);
+ 	close(s);
+-	if (pid < 0) close(*m);
++	close(p[1]);
++	if (read(p[0], &ec, sizeof ec) > 0) {
++		int status;
++		waitpid(pid, &status, 0);
++		pid = -1;
++		errno = ec;
++	}
++	close(p[0]);
++
++out:
++	if (pid > 0) *pm = m;
++	else close(m);
++
++	pthread_setcancelstate(cs, 0);
++	pthread_sigmask(SIG_SETMASK, &oldset, 0);
++
+ 	return pid;
+ }
+--- a/src/misc/getopt.c
++++ b/src/misc/getopt.c
+@@ -4,6 +4,7 @@
+ #include <limits.h>
+ #include <stdlib.h>
+ #include "libc.h"
++#include "locale_impl.h"
+ 
+ char *optarg;
+ int optind=1, opterr=1, optopt, __optpos, __optreset=0;
+@@ -11,6 +12,18 @@ int optind=1, opterr=1, optopt, __optpos
+ #define optpos __optpos
+ weak_alias(__optreset, optreset);
+ 
++void __getopt_msg(const char *a, const char *b, const char *c, size_t l)
++{
++	FILE *f = stderr;
++	b = __lctrans_cur(b);
++	flockfile(f);
++	fwrite(a, strlen(a), 1, f)
++	&& fwrite(b, strlen(b), 1, f)
++	&& fwrite(c, l, 1, f)
++	&& putc('\n', f);
++	funlockfile(f);
++}
++
+ int getopt(int argc, char * const argv[], const char *optstring)
+ {
+ 	int i;
+@@ -24,8 +37,20 @@ int getopt(int argc, char * const argv[]
+ 		optind = 1;
+ 	}
+ 
+-	if (optind >= argc || !argv[optind] || argv[optind][0] != '-' || !argv[optind][1])
++	if (optind >= argc || !argv[optind])
++		return -1;
++
++	if (argv[optind][0] != '-') {
++		if (optstring[0] == '-') {
++			optarg = argv[optind++];
++			return 1;
++		}
++		return -1;
++	}
++
++	if (!argv[optind][1])
+ 		return -1;
++
+ 	if (argv[optind][1] == '-' && !argv[optind][2])
+ 		return optind++, -1;
+ 
+@@ -43,30 +68,31 @@ int getopt(int argc, char * const argv[]
+ 		optpos = 0;
+ 	}
+ 
+-	for (i=0; (l = mbtowc(&d, optstring+i, MB_LEN_MAX)) && d!=c; i+=l>0?l:1);
++	if (optstring[0] == '-' || optstring[0] == '+')
++		optstring++;
++
++	i = 0;
++	d = 0;
++	do {
++		l = mbtowc(&d, optstring+i, MB_LEN_MAX);
++		if (l>0) i+=l; else i++;
++	} while (l && d != c);
+ 
+ 	if (d != c) {
+-		if (optstring[0] != ':' && opterr) {
+-			write(2, argv[0], strlen(argv[0]));
+-			write(2, ": illegal option: ", 18);
+-			write(2, optchar, k);
+-			write(2, "\n", 1);
+-		}
++		if (optstring[0] != ':' && opterr)
++			__getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
+ 		return '?';
+ 	}
+-	if (optstring[i+1] == ':') {
+-		if (optind >= argc) {
++	if (optstring[i] == ':') {
++		if (optstring[i+1] == ':') optarg = 0;
++		else if (optind >= argc) {
+ 			if (optstring[0] == ':') return ':';
+-			if (opterr) {
+-				write(2, argv[0], strlen(argv[0]));
+-				write(2, ": option requires an argument: ", 31);
+-				write(2, optchar, k);
+-				write(2, "\n", 1);
+-			}
++			if (opterr) __getopt_msg(argv[0],
++				": option requires an argument: ",
++				optchar, k);
+ 			return '?';
+ 		}
+-		if (optstring[i+2] == ':') optarg = 0;
+-		if (optstring[i+2] != ':' || optpos) {
++		if (optstring[i+1] != ':' || optpos) {
+ 			optarg = argv[optind++] + optpos;
+ 			optpos = 0;
+ 		}
+--- a/src/misc/getopt_long.c
++++ b/src/misc/getopt_long.c
+@@ -2,37 +2,106 @@
+ #include <stddef.h>
+ #include <getopt.h>
+ #include <stdio.h>
++#include <string.h>
+ 
+ extern int __optpos, __optreset;
+ 
++static void permute(char *const *argv, int dest, int src)
++{
++	char **av = (char **)argv;
++	char *tmp = av[src];
++	int i;
++	for (i=src; i>dest; i--)
++		av[i] = av[i-1];
++	av[dest] = tmp;
++}
++
++void __getopt_msg(const char *, const char *, const char *, size_t);
++
++static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly);
++
+ static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
+ {
++	int ret, skipped, resumed;
++	const char *optstring2 = optstring + 1;
+ 	if (!optind || __optreset) {
+ 		__optreset = 0;
+ 		__optpos = 0;
+ 		optind = 1;
+ 	}
+-	if (optind >= argc || !argv[optind] || argv[optind][0] != '-') return -1;
+-	if ((longonly && argv[optind][1]) ||
+-		(argv[optind][1] == '-' && argv[optind][2]))
+-	{
++	if (optind >= argc || !argv[optind]) return -1;
++	skipped = optind;
++	if (optstring[0] != '+' && optstring[0] != '-') {
+ 		int i;
+-		for (i=0; longopts[i].name; i++) {
++		for (i=optind; ; i++) {
++			if (i >= argc || !argv[i]) return -1;
++			if (argv[i][0] == '-' && argv[i][1]) break;
++		}
++		optind = i;
++		optstring2 = optstring;
++	}
++	resumed = optind;
++	ret = __getopt_long_core(argc, argv, optstring2, longopts, idx, longonly);
++	if (resumed > skipped) {
++		int i, cnt = optind-resumed;
++		for (i=0; i<cnt; i++)
++			permute(argv, skipped, optind-1);
++		optind = skipped + cnt;
++	}
++	return ret;
++}
++
++static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
++{
++
++	if (longopts && argv[optind][0] == '-' &&
++		((longonly && argv[optind][1]) ||
++		 (argv[optind][1] == '-' && argv[optind][2])))
++	{
++		int i, cnt, match;
++		char *opt;
++		for (cnt=i=0; longopts[i].name; i++) {
+ 			const char *name = longopts[i].name;
+-			char *opt = argv[optind]+1;
++			opt = argv[optind]+1;
+ 			if (*opt == '-') opt++;
+ 			for (; *name && *name == *opt; name++, opt++);
+-			if (*name || (*opt && *opt != '=')) continue;
++			if (*opt && *opt != '=') continue;
++			match = i;
++			if (!*name) {
++				cnt = 1;
++				break;
++			}
++			cnt++;
++		}
++		if (cnt==1) {
++			i = match;
++			optind++;
++			optopt = longopts[i].val;
+ 			if (*opt == '=') {
+-				if (!longopts[i].has_arg) continue;
++				if (!longopts[i].has_arg) {
++					if (optstring[0] == ':' || !opterr)
++						return '?';
++					__getopt_msg(argv[0],
++						": option does not take an argument: ",
++						longopts[i].name,
++						strlen(longopts[i].name));
++					return '?';
++				}
+ 				optarg = opt+1;
+ 			} else {
+ 				if (longopts[i].has_arg == required_argument) {
+-					if (!(optarg = argv[++optind]))
+-						return ':';
++					if (!(optarg = argv[optind])) {
++						if (optstring[0] == ':' || !opterr)
++							return ':';
++						__getopt_msg(argv[0],
++							": option requires an argument: ",
++							longopts[i].name,
++							strlen(longopts[i].name));
++						return '?';
++					}
++					optind++;
+ 				} else optarg = NULL;
+ 			}
+-			optind++;
+ 			if (idx) *idx = i;
+ 			if (longopts[i].flag) {
+ 				*longopts[i].flag = longopts[i].val;
+@@ -41,6 +110,12 @@ static int __getopt_long(int argc, char 
+ 			return longopts[i].val;
+ 		}
+ 		if (argv[optind][1] == '-') {
++			if (optstring[0] != ':' && opterr)
++				__getopt_msg(argv[0], cnt ?
++					": option is ambiguous: " :
++					": unrecognized option: ",
++					argv[optind]+2,
++					strlen(argv[optind]+2));
+ 			optind++;
+ 			return '?';
+ 		}
+--- /dev/null
++++ b/src/misc/login_tty.c
+@@ -0,0 +1,14 @@
++#include <utmp.h>
++#include <sys/ioctl.h>
++#include <unistd.h>
++
++int login_tty(int fd)
++{
++	setsid();
++	if (ioctl(fd, TIOCSCTTY, (char *)0)) return -1;
++	dup2(fd, 0);
++	dup2(fd, 1);
++	dup2(fd, 2);
++	if (fd>2) close(fd);
++	return 0;
++}
+--- a/src/misc/openpty.c
++++ b/src/misc/openpty.c
+@@ -3,31 +3,38 @@
+ #include <unistd.h>
+ #include <pty.h>
+ #include <stdio.h>
++#include <pthread.h>
+ 
+ /* Nonstandard, but vastly superior to the standard functions */
+ 
+-int openpty(int *m, int *s, char *name, const struct termios *tio, const struct winsize *ws)
++int openpty(int *pm, int *ps, char *name, const struct termios *tio, const struct winsize *ws)
+ {
+-	int n=0;
++	int m, s, n=0, cs;
+ 	char buf[20];
+ 
+-	*m = open("/dev/ptmx", O_RDWR|O_NOCTTY);
+-	if (*m < 0) return -1;
++	m = open("/dev/ptmx", O_RDWR|O_NOCTTY);
++	if (m < 0) return -1;
+ 
+-	if (ioctl(*m, TIOCSPTLCK, &n) || ioctl (*m, TIOCGPTN, &n)) {
+-		close(*m);
+-		return -1;
+-	}
++	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
++
++	if (ioctl(m, TIOCSPTLCK, &n) || ioctl (m, TIOCGPTN, &n))
++		goto fail;
+ 
+ 	if (!name) name = buf;
+ 	snprintf(name, sizeof buf, "/dev/pts/%d", n);
+-	if ((*s = open(name, O_RDWR|O_NOCTTY)) < 0) {
+-		close(*m);
+-		return -1;
+-	}
++	if ((s = open(name, O_RDWR|O_NOCTTY)) < 0)
++		goto fail;
++
++	if (tio) tcsetattr(s, TCSANOW, tio);
++	if (ws) ioctl(s, TIOCSWINSZ, ws);
+ 
+-	if (tio) tcsetattr(*s, TCSANOW, tio);
+-	if (ws) ioctl(*s, TIOCSWINSZ, ws);
++	*pm = m;
++	*ps = s;
+ 
++	pthread_setcancelstate(cs, 0);
+ 	return 0;
++fail:
++	close(m);
++	pthread_setcancelstate(cs, 0);
++	return -1;
+ }
+--- a/src/misc/syslog.c
++++ b/src/misc/syslog.c
+@@ -46,8 +46,12 @@ void closelog(void)
+ 
+ static void __openlog()
+ {
+-	log_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+-	if (log_fd >= 0) connect(log_fd, (void *)&log_addr, sizeof log_addr);
++	int fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
++	if (fd < 0) return;
++	if (connect(fd, (void *)&log_addr, sizeof log_addr) < 0)
++		close(fd);
++	else
++		log_fd = fd;
+ }
+ 
+ void openlog(const char *ident, int opt, int facility)
+--- a/src/multibyte/c16rtomb.c
++++ b/src/multibyte/c16rtomb.c
+@@ -4,6 +4,8 @@
+ 
+ size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
+ {
++	static unsigned internal_state;
++	if (!ps) ps = (void *)&internal_state;
+ 	unsigned *x = (unsigned *)ps;
+ 	wchar_t wc;
+ 
+--- a/src/multibyte/mbrtoc16.c
++++ b/src/multibyte/mbrtoc16.c
+@@ -3,6 +3,8 @@
+ 
+ size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps)
+ {
++	static unsigned internal_state;
++	if (!ps) ps = (void *)&internal_state;
+ 	unsigned *pending = (unsigned *)ps;
+ 
+ 	if (!s) return mbrtoc16(0, "", 1, ps);
+--- a/src/multibyte/mbrtoc32.c
++++ b/src/multibyte/mbrtoc32.c
+@@ -3,6 +3,8 @@
+ 
+ size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps)
+ {
++	static unsigned internal_state;
++	if (!ps) ps = (void *)&internal_state;
+ 	if (!s) return mbrtoc32(0, "", 1, ps);
+ 	wchar_t wc;
+ 	size_t ret = mbrtowc(&wc, s, n, ps);
+--- a/src/multibyte/wcsnrtombs.c
++++ b/src/multibyte/wcsnrtombs.c
+@@ -40,7 +40,7 @@ size_t wcsnrtombs(char *restrict dst, co
+ 		ws++; wn--;
+ 		/* safe - this loop runs fewer than sizeof(buf) times */
+ 		s+=l; n-=l;
+-		cnt++;
++		cnt += l;
+ 	}
+ 	if (dst) *wcs = ws;
+ 	return cnt;
+--- /dev/null
++++ b/src/network/ns_parse.c
+@@ -0,0 +1,171 @@
++#define _BSD_SOURCE
++#include <errno.h>
++#include <stddef.h>
++#include <resolv.h>
++#include <arpa/nameser.h>
++
++const struct _ns_flagdata _ns_flagdata[16] = {
++	{ 0x8000, 15 },
++	{ 0x7800, 11 },
++	{ 0x0400, 10 },
++	{ 0x0200, 9 },
++	{ 0x0100, 8 },
++	{ 0x0080, 7 },
++	{ 0x0040, 6 },
++	{ 0x0020, 5 },
++	{ 0x0010, 4 },
++	{ 0x000f, 0 },
++	{ 0x0000, 0 },
++	{ 0x0000, 0 },
++	{ 0x0000, 0 },
++	{ 0x0000, 0 },
++	{ 0x0000, 0 },
++	{ 0x0000, 0 },
++};
++
++unsigned ns_get16(const unsigned char *cp)
++{
++	return cp[0]<<8 | cp[1];
++}
++
++unsigned long ns_get32(const unsigned char *cp)
++{
++	return (unsigned)cp[0]<<24 | cp[1]<<16 | cp[2]<<8 | cp[3];
++}
++
++void ns_put16(unsigned s, unsigned char *cp)
++{
++	*cp++ = s>>8;
++	*cp++ = s;
++}
++
++void ns_put32(unsigned long l, unsigned char *cp)
++{
++	*cp++ = l>>24;
++	*cp++ = l>>16;
++	*cp++ = l>>8;
++	*cp++ = l;
++}
++
++int ns_initparse(const unsigned char *msg, int msglen, ns_msg *handle)
++{
++	int i, r;
++
++	handle->_msg = msg;
++	handle->_eom = msg + msglen;
++	if (msglen < (2 + ns_s_max) * NS_INT16SZ) goto bad;
++	NS_GET16(handle->_id, msg);
++	NS_GET16(handle->_flags, msg);
++	for (i = 0; i < ns_s_max; i++) NS_GET16(handle->_counts[i], msg);
++	for (i = 0; i < ns_s_max; i++) {
++		if (handle->_counts[i]) {
++			handle->_sections[i] = msg;
++			r = ns_skiprr(msg, handle->_eom, i, handle->_counts[i]);
++			if (r < 0) return -1;
++			msg += r;
++		} else {
++			handle->_sections[i] = NULL;
++		}
++	}
++	if (msg != handle->_eom) goto bad;
++	handle->_sect = ns_s_max;
++	handle->_rrnum = -1;
++	handle->_msg_ptr = NULL;
++	return 0;
++bad:
++	errno = EMSGSIZE;
++	return -1;
++}
++
++int ns_skiprr(const unsigned char *ptr, const unsigned char *eom, ns_sect section, int count)
++{
++	const unsigned char *p = ptr;
++	int r;
++
++	while (count--) {
++		r = dn_skipname(p, eom);
++		if (r < 0) goto bad;
++		if (r + 2 * NS_INT16SZ > eom - p) goto bad;
++		p += r + 2 * NS_INT16SZ;
++		if (section != ns_s_qd) {
++			if (NS_INT32SZ + NS_INT16SZ > eom - p) goto bad;
++			p += NS_INT32SZ;
++			NS_GET16(r, p);
++			if (r > eom - p) goto bad;
++			p += r;
++		}
++	}
++	return ptr - p;
++bad:
++	errno = EMSGSIZE;
++	return -1;
++}
++
++int ns_parserr(ns_msg *handle, ns_sect section, int rrnum, ns_rr *rr)
++{
++	int r;
++
++	if (section < 0 || section >= ns_s_max) goto bad;
++	if (section != handle->_sect) {
++		handle->_sect = section;
++		handle->_rrnum = 0;
++		handle->_msg_ptr = handle->_sections[section];
++	}
++	if (rrnum == -1) rrnum = handle->_rrnum;
++	if (rrnum < 0 || rrnum >= handle->_counts[section]) goto bad;
++	if (rrnum < handle->_rrnum) {
++		handle->_rrnum = 0;
++		handle->_msg_ptr = handle->_sections[section];
++	}
++	if (rrnum > handle->_rrnum) {
++		r = ns_skiprr(handle->_msg_ptr, handle->_eom, section, rrnum - handle->_rrnum);
++		if (r < 0) return -1;
++		handle->_msg_ptr += r;
++		handle->_rrnum = rrnum;
++	}
++	r = ns_name_uncompress(handle->_msg, handle->_eom, handle->_msg_ptr, rr->name, NS_MAXDNAME);
++	if (r < 0) return -1;
++	handle->_msg_ptr += r;
++	if (2 * NS_INT16SZ > handle->_eom - handle->_msg_ptr) goto size;
++	NS_GET16(rr->type, handle->_msg_ptr);
++	NS_GET16(rr->rr_class, handle->_msg_ptr);
++	if (section != ns_s_qd) {
++		if (NS_INT32SZ + NS_INT16SZ > handle->_eom - handle->_msg_ptr) goto size;
++		NS_GET32(rr->ttl, handle->_msg_ptr);
++		NS_GET16(rr->rdlength, handle->_msg_ptr);
++		if (rr->rdlength > handle->_eom - handle->_msg_ptr) goto size;
++		rr->rdata = handle->_msg_ptr;
++		handle->_msg_ptr += rr->rdlength;
++	} else {
++		rr->ttl = 0;
++		rr->rdlength = 0;
++		rr->rdata = NULL;
++	}
++	handle->_rrnum++;
++	if (handle->_rrnum > handle->_counts[section]) {
++		handle->_sect = section + 1;
++		if (handle->_sect == ns_s_max) {
++			handle->_rrnum = -1;
++			handle->_msg_ptr = NULL;
++		} else {
++			handle->_rrnum = 0;
++		}
++	}
++	return 0;
++bad:
++	errno = ENODEV;
++	return -1;
++size:
++	errno = EMSGSIZE;
++	return -1;
++}
++
++int ns_name_uncompress(const unsigned char *msg, const unsigned char *eom,
++                       const unsigned char *src, char *dst, size_t dstsiz)
++{
++	int r;
++	r = dn_expand(msg, eom, src, dst, dstsiz);
++	if (r < 0) errno = EMSGSIZE;
++	return r;
++}
++
+--- a/src/process/posix_spawn.c
++++ b/src/process/posix_spawn.c
+@@ -102,8 +102,7 @@ static int child(void *args_vp)
+ 			}
+ 			switch(op->cmd) {
+ 			case FDOP_CLOSE:
+-				if ((ret=__syscall(SYS_close, op->fd)))
+-					goto fail;
++				__syscall(SYS_close, op->fd);
+ 				break;
+ 			case FDOP_DUP2:
+ 				if ((ret=__sys_dup2(op->srcfd, op->fd))<0)
+@@ -137,7 +136,7 @@ static int child(void *args_vp)
+ fail:
+ 	/* Since sizeof errno < PIPE_BUF, the write is atomic. */
+ 	ret = -ret;
+-	if (ret) while (write(p, &ret, sizeof ret) < 0);
++	if (ret) while (__syscall(SYS_write, p, &ret, sizeof ret) < 0);
+ 	_exit(127);
+ }
+ 
+--- a/src/regex/fnmatch.c
++++ b/src/regex/fnmatch.c
+@@ -97,7 +97,13 @@ escaped:
+ 	return pat[0];
+ }
+ 
+-static int match_bracket(const char *p, int k)
++static int casefold(int k)
++{
++	int c = towupper(k);
++	return c == k ? towlower(k) : c;
++}
++
++static int match_bracket(const char *p, int k, int kfold)
+ {
+ 	wchar_t wc;
+ 	int inv = 0;
+@@ -119,7 +125,10 @@ static int match_bracket(const char *p, 
+ 			wchar_t wc2;
+ 			int l = mbtowc(&wc2, p+1, 4);
+ 			if (l < 0) return 0;
+-			if (wc<=wc2 && (unsigned)k-wc <= wc2-wc) return !inv;
++			if (wc <= wc2)
++				if ((unsigned)k-wc <= wc2-wc ||
++				    (unsigned)kfold-wc <= wc2-wc)
++					return !inv;
+ 			p += l-1;
+ 			continue;
+ 		}
+@@ -132,7 +141,9 @@ static int match_bracket(const char *p, 
+ 				char buf[16];
+ 				memcpy(buf, p0, p-1-p0);
+ 				buf[p-1-p0] = 0;
+-				if (iswctype(k, wctype(buf))) return !inv;
++				if (iswctype(k, wctype(buf)) ||
++				    iswctype(kfold, wctype(buf)))
++					return !inv;
+ 			}
+ 			continue;
+ 		}
+@@ -143,7 +154,7 @@ static int match_bracket(const char *p, 
+ 			if (l < 0) return 0;
+ 			p += l-1;
+ 		}
+-		if (wc==k) return !inv;
++		if (wc==k || wc==kfold) return !inv;
+ 	}
+ 	return inv;
+ }
+@@ -153,7 +164,7 @@ static int fnmatch_internal(const char *
+ 	const char *p, *ptail, *endpat;
+ 	const char *s, *stail, *endstr;
+ 	size_t pinc, sinc, tailcnt=0;
+-	int c, k;
++	int c, k, kfold;
+ 
+ 	if (flags & FNM_PERIOD) {
+ 		if (*str == '.' && *pat != '.')
+@@ -173,10 +184,11 @@ static int fnmatch_internal(const char *
+ 				return (c==END) ? 0 : FNM_NOMATCH;
+ 			str += sinc;
+ 			n -= sinc;
++			kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
+ 			if (c == BRACKET) {
+-				if (!match_bracket(pat, k))
++				if (!match_bracket(pat, k, kfold))
+ 					return FNM_NOMATCH;
+-			} else if (c != QUESTION && k != c) {
++			} else if (c != QUESTION && k != c && kfold != c) {
+ 				return FNM_NOMATCH;
+ 			}
+ 			pat+=pinc;
+@@ -233,10 +245,11 @@ static int fnmatch_internal(const char *
+ 			break;
+ 		}
+ 		s += sinc;
++		kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
+ 		if (c == BRACKET) {
+-			if (!match_bracket(p-pinc, k))
++			if (!match_bracket(p-pinc, k, kfold))
+ 				return FNM_NOMATCH;
+-		} else if (c != QUESTION && k != c) {
++		} else if (c != QUESTION && k != c && kfold != c) {
+ 			return FNM_NOMATCH;
+ 		}
+ 	}
+@@ -261,10 +274,11 @@ static int fnmatch_internal(const char *
+ 			k = str_next(s, endstr-s, &sinc);
+ 			if (!k)
+ 				return FNM_NOMATCH;
++			kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
+ 			if (c == BRACKET) {
+-				if (!match_bracket(p-pinc, k))
++				if (!match_bracket(p-pinc, k, kfold))
+ 					break;
+-			} else if (c != QUESTION && k != c) {
++			} else if (c != QUESTION && k != c && kfold != c) {
+ 				break;
+ 			}
+ 			s += sinc;
+--- a/src/sched/affinity.c
++++ b/src/sched/affinity.c
+@@ -1,5 +1,6 @@
+ #define _GNU_SOURCE
+ #include <sched.h>
++#include <string.h>
+ #include "pthread_impl.h"
+ #include "syscall.h"
+ 
+@@ -10,17 +11,23 @@ int sched_setaffinity(pid_t tid, size_t 
+ 
+ int pthread_setaffinity_np(pthread_t td, size_t size, const cpu_set_t *set)
+ {
+-	return syscall(SYS_sched_setaffinity, td->tid, size, set);
++	return -__syscall(SYS_sched_setaffinity, td->tid, size, set);
+ }
+ 
+-int sched_getaffinity(pid_t tid, size_t size, cpu_set_t *set)
++static int do_getaffinity(pid_t tid, size_t size, cpu_set_t *set)
+ {
+ 	long ret = __syscall(SYS_sched_getaffinity, tid, size, set);
+-	if (ret > 0) ret = 0;
+-	return __syscall_ret(ret);
++	if (ret < 0) return ret;
++	if (ret < size) memset((char *)set+ret, 0, size-ret);
++	return 0;
++}
++
++int sched_getaffinity(pid_t tid, size_t size, cpu_set_t *set)
++{
++	return __syscall_ret(do_getaffinity(tid, size, set));
+ }
+ 
+ int pthread_getaffinity_np(pthread_t td, size_t size, cpu_set_t *set)
+ {
+-	return sched_getaffinity(td->tid, size, set);
++	return -do_getaffinity(td->tid, size, set);
+ }
+--- a/src/setjmp/arm/longjmp.s
++++ b/src/setjmp/arm/longjmp.s
+@@ -20,7 +20,7 @@ longjmp:
+ 	ldc p2, cr4, [ip], #48
+ 2:	tst r1,#0x40
+ 	beq 2f
+-	ldc p11, cr8, [ip], #64
++	.word 0xecbc8b10 /* vldmia ip!, {d8-d15} */
+ 2:	tst r1,#0x200
+ 	beq 3f
+ 	ldcl p1, cr10, [ip], #8
+--- a/src/setjmp/arm/setjmp.s
++++ b/src/setjmp/arm/setjmp.s
+@@ -22,7 +22,7 @@ setjmp:
+ 	stc p2, cr4, [ip], #48
+ 2:	tst r1,#0x40
+ 	beq 2f
+-	stc p11, cr8, [ip], #64
++	.word 0xecac8b10 /* vstmia ip!, {d8-d15} */
+ 2:	tst r1,#0x200
+ 	beq 3f
+ 	stcl p1, cr10, [ip], #8
+--- a/src/signal/raise.c
++++ b/src/signal/raise.c
+@@ -5,12 +5,11 @@
+ 
+ int raise(int sig)
+ {
+-	int pid, tid, ret;
++	int tid, ret;
+ 	sigset_t set;
+ 	__block_app_sigs(&set);
+ 	tid = __syscall(SYS_gettid);
+-	pid = __syscall(SYS_getpid);
+-	ret = syscall(SYS_tgkill, pid, tid, sig);
++	ret = syscall(SYS_tkill, tid, sig);
+ 	__restore_sigs(&set);
+ 	return ret;
+ }
+--- a/src/stdio/vfprintf.c
++++ b/src/stdio/vfprintf.c
+@@ -158,7 +158,7 @@ static void pop_arg(union arg *arg, int 
+ 
+ static void out(FILE *f, const char *s, size_t l)
+ {
+-	__fwritex((void *)s, l, f);
++	if (!(f->flags & F_ERR)) __fwritex((void *)s, l, f);
+ }
+ 
+ static void pad(FILE *f, char c, int w, int l, int fl)
+@@ -225,7 +225,7 @@ static int fmt_fp(FILE *f, long double y
+ 
+ 	if (!isfinite(y)) {
+ 		char *s = (t&32)?"inf":"INF";
+-		if (y!=y) s=(t&32)?"nan":"NAN", pl=0;
++		if (y!=y) s=(t&32)?"nan":"NAN";
+ 		pad(f, ' ', w, 3+pl, fl&~ZERO_PAD);
+ 		out(f, prefix, pl);
+ 		out(f, s, 3);
+@@ -570,7 +570,7 @@ static int printf_core(FILE *f, const ch
+ 			if (0) {
+ 		case 'o':
+ 			a = fmt_o(arg.i, z);
+-			if ((fl&ALT_FORM) && arg.i) prefix+=5, pl=1;
++			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
+ 			} if (0) {
+ 		case 'd': case 'i':
+ 			pl=1;
+@@ -656,6 +656,7 @@ int vfprintf(FILE *restrict f, const cha
+ 	int nl_type[NL_ARGMAX+1] = {0};
+ 	union arg nl_arg[NL_ARGMAX+1];
+ 	unsigned char internal_buf[80], *saved_buf = 0;
++	int olderr;
+ 	int ret;
+ 
+ 	/* the copy allows passing va_list* even if va_list is an array */
+@@ -666,6 +667,8 @@ int vfprintf(FILE *restrict f, const cha
+ 	}
+ 
+ 	FLOCK(f);
++	olderr = f->flags & F_ERR;
++	if (f->mode < 1) f->flags &= ~F_ERR;
+ 	if (!f->buf_size) {
+ 		saved_buf = f->buf;
+ 		f->wpos = f->wbase = f->buf = internal_buf;
+@@ -680,6 +683,8 @@ int vfprintf(FILE *restrict f, const cha
+ 		f->buf_size = 0;
+ 		f->wpos = f->wbase = f->wend = 0;
+ 	}
++	if (f->flags & F_ERR) ret = -1;
++	f->flags |= olderr;
+ 	FUNLOCK(f);
+ 	va_end(ap2);
+ 	return ret;
+--- a/src/stdio/vfwprintf.c
++++ b/src/stdio/vfwprintf.c
+@@ -149,7 +149,7 @@ static void pop_arg(union arg *arg, int 
+ 
+ static void out(FILE *f, const wchar_t *s, size_t l)
+ {
+-	while (l--) fputwc(*s++, f);
++	while (l-- && !(f->flags & F_ERR)) fputwc(*s++, f);
+ }
+ 
+ static int getint(wchar_t **s) {
+@@ -345,6 +345,7 @@ int vfwprintf(FILE *restrict f, const wc
+ 	va_list ap2;
+ 	int nl_type[NL_ARGMAX] = {0};
+ 	union arg nl_arg[NL_ARGMAX];
++	int olderr;
+ 	int ret;
+ 
+ 	/* the copy allows passing va_list* even if va_list is an array */
+@@ -356,7 +357,11 @@ int vfwprintf(FILE *restrict f, const wc
+ 
+ 	FLOCK(f);
+ 	f->mode |= f->mode+1;
++	olderr = f->flags & F_ERR;
++	f->flags &= ~F_ERR;
+ 	ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
++	if (f->flags & F_ERR) ret = -1;
++	f->flags |= olderr;
+ 	FUNLOCK(f);
+ 	va_end(ap2);
+ 	return ret;
+--- a/src/string/armel/memcpy.s
++++ b/src/string/armel/memcpy.s
+@@ -49,113 +49,113 @@ memcpy:
+ 	 * ARM ABI. Since we have to save R0, we might as well save R4
+ 	 * which we can use for better pipelining of the reads below
+ 	 */
+-        .fnstart
+-        .save       {r0, r4, lr}
+-        stmfd       sp!, {r0, r4, lr}
+-        /* Making room for r5-r11 which will be spilled later */
+-        .pad        #28
+-        sub         sp, sp, #28
+-
+-        /* it simplifies things to take care of len<4 early */
+-        cmp	r2, #4
+-        blo	copy_last_3_and_return
+-
+-        /* compute the offset to align the source
+-         * offset = (4-(src&3))&3 = -src & 3
+-         */
+-        rsb	r3, r1, #0
+-        ands	r3, r3, #3
+-        beq	src_aligned
+-
+-        /* align source to 32 bits. We need to insert 2 instructions between
+-         * a ldr[b|h] and str[b|h] because byte and half-word instructions
+-         * stall 2 cycles.
+-         */
+-        movs	r12, r3, lsl #31
+-        sub	r2, r2, r3		/* we know that r3 <= r2 because r2 >= 4 */
+-        ldrmib	r3, [r1], #1
+-        ldrcsb	r4, [r1], #1
+-        ldrcsb	r12,[r1], #1
+-        strmib	r3, [r0], #1
+-        strcsb	r4, [r0], #1
+-        strcsb	r12,[r0], #1
++	.fnstart
++	.save       {r0, r4, lr}
++	stmfd       sp!, {r0, r4, lr}
++	/* Making room for r5-r11 which will be spilled later */
++	.pad        #28
++	sub         sp, sp, #28
++
++	/* it simplifies things to take care of len<4 early */
++	cmp     r2, #4
++	blo     copy_last_3_and_return
++
++	/* compute the offset to align the source
++	 * offset = (4-(src&3))&3 = -src & 3
++	 */
++	rsb     r3, r1, #0
++	ands    r3, r3, #3
++	beq     src_aligned
++
++	/* align source to 32 bits. We need to insert 2 instructions between
++	 * a ldr[b|h] and str[b|h] because byte and half-word instructions
++	 * stall 2 cycles.
++	 */
++	movs    r12, r3, lsl #31
++	sub     r2, r2, r3              /* we know that r3 <= r2 because r2 >= 4 */
++	.word 0x44d13001 /* ldrbmi r3, [r1], #1 */
++	.word 0x24d14001 /* ldrbcs r4, [r1], #1 */
++	.word 0x24d1c001 /* ldrbcs r12,[r1], #1 */
++	.word 0x44c03001 /* strbmi r3, [r0], #1 */
++	.word 0x24c04001 /* strbcs r4, [r0], #1 */
++	.word 0x24c0c001 /* strbcs r12,[r0], #1 */
+ 
+ src_aligned:
+ 
+ 	/* see if src and dst are aligned together (congruent) */
+-	eor	r12, r0, r1
+-        tst	r12, #3
+-        bne	non_congruent
+-
+-        /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
+-         * frame. Don't update sp.
+-         */
+-        stmea	sp, {r5-r11}
+-
+-        /* align the destination to a cache-line */
+-        rsb	r3, r0, #0
+-        ands	r3, r3, #0x1C
+-        beq    	congruent_aligned32
+-        cmp    	r3, r2
+-        andhi	r3, r2, #0x1C
+-
+-        /* conditionnaly copies 0 to 7 words (length in r3) */
+-        movs	r12, r3, lsl #28
+-        ldmcsia	r1!, {r4, r5, r6, r7}	/* 16 bytes */
+-        ldmmiia	r1!, {r8, r9}			/*  8 bytes */
+-        stmcsia	r0!, {r4, r5, r6, r7}
+-        stmmiia	r0!, {r8, r9}
+-        tst    	r3, #0x4
+-        ldrne	r10,[r1], #4			/*  4 bytes */
+-        strne	r10,[r0], #4
+-        sub    	r2, r2, r3
++	eor     r12, r0, r1
++	tst     r12, #3
++	bne     non_congruent
++
++	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
++	 * frame. Don't update sp.
++	 */
++	stmea   sp, {r5-r11}
++
++	/* align the destination to a cache-line */
++	rsb     r3, r0, #0
++	ands    r3, r3, #0x1C
++	beq     congruent_aligned32
++	cmp     r3, r2
++	andhi   r3, r2, #0x1C
++
++	/* conditionnaly copies 0 to 7 words (length in r3) */
++	movs    r12, r3, lsl #28
++	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
++	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
++	stmcs   r0!, {r4, r5, r6, r7}
++	stmmi   r0!, {r8, r9}
++	tst     r3, #0x4
++	ldrne   r10,[r1], #4                    /*  4 bytes */
++	strne   r10,[r0], #4
++	sub     r2, r2, r3
+ 
+ congruent_aligned32:
+ 	/*
+- 	 * here source is aligned to 32 bytes.
+- 	 */
++	 * here source is aligned to 32 bytes.
++	 */
+ 
+ cached_aligned32:
+-        subs   	r2, r2, #32
+-        blo    	less_than_32_left
++	subs    r2, r2, #32
++	blo     less_than_32_left
+ 
+-        /*
+-         * We preload a cache-line up to 64 bytes ahead. On the 926, this will
+-         * stall only until the requested world is fetched, but the linefill
+-         * continues in the the background.
+-         * While the linefill is going, we write our previous cache-line
+-         * into the write-buffer (which should have some free space).
+-         * When the linefill is done, the writebuffer will
+-         * start dumping its content into memory
+-         *
+-         * While all this is going, we then load a full cache line into
+-         * 8 registers, this cache line should be in the cache by now
+-         * (or partly in the cache).
+-         *
+-         * This code should work well regardless of the source/dest alignment.
+-         *
+-         */
+-
+-        /* Align the preload register to a cache-line because the cpu does
+-         * "critical word first" (the first word requested is loaded first).
+-         */
+-        @ bic    	r12, r1, #0x1F
+-        @ add    	r12, r12, #64
+-
+-1:      ldmia  	r1!, { r4-r11 }
+-        subs   	r2, r2, #32
+-
+-        /* 
+-         * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
+-         * for ARM9 preload will not be safely guarded by the preceding subs.
+-         * When it is safely guarded the only possibility to have SIGSEGV here
+-         * is because the caller overstates the length.
+-         */
+-        @ ldrhi  	r3, [r12], #32      /* cheap ARM9 preload */
+-        stmia  	r0!, { r4-r11 }
+-	bhs    	1b
++	/*
++	 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
++	 * stall only until the requested world is fetched, but the linefill
++	 * continues in the the background.
++	 * While the linefill is going, we write our previous cache-line
++	 * into the write-buffer (which should have some free space).
++	 * When the linefill is done, the writebuffer will
++	 * start dumping its content into memory
++	 *
++	 * While all this is going, we then load a full cache line into
++	 * 8 registers, this cache line should be in the cache by now
++	 * (or partly in the cache).
++	 *
++	 * This code should work well regardless of the source/dest alignment.
++	 *
++	 */
+ 
+-        add	r2, r2, #32
++	/* Align the preload register to a cache-line because the cpu does
++	 * "critical word first" (the first word requested is loaded first).
++	 */
++	@ bic           r12, r1, #0x1F
++	@ add           r12, r12, #64
++
++1:      ldmia   r1!, { r4-r11 }
++	subs    r2, r2, #32
++
++	/* 
++	 * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
++	 * for ARM9 preload will not be safely guarded by the preceding subs.
++	 * When it is safely guarded the only possibility to have SIGSEGV here
++	 * is because the caller overstates the length.
++	 */
++	@ ldrhi         r3, [r12], #32      /* cheap ARM9 preload */
++	stmia   r0!, { r4-r11 }
++	bhs     1b
++
++	add     r2, r2, #32
+ 
+ less_than_32_left:
+ 	/*
+@@ -166,30 +166,30 @@ less_than_32_left:
+ 	 * be a common case (if not executed the code below takes
+ 	 * about 16 cycles)
+ 	 */
+-	tst	r2, #0x1F
+-        beq	1f
++	tst     r2, #0x1F
++	beq     1f
+ 
+-        /* conditionnaly copies 0 to 31 bytes */
+-        movs	r12, r2, lsl #28
+-        ldmcsia	r1!, {r4, r5, r6, r7}	/* 16 bytes */
+-        ldmmiia	r1!, {r8, r9}			/*  8 bytes */
+-        stmcsia	r0!, {r4, r5, r6, r7}
+-        stmmiia	r0!, {r8, r9}
+-        movs	r12, r2, lsl #30
+-        ldrcs	r3, [r1], #4			/*  4 bytes */
+-        ldrmih	r4, [r1], #2			/*  2 bytes */
+-        strcs	r3, [r0], #4
+-        strmih	r4, [r0], #2
+-        tst    	r2, #0x1
+-        ldrneb	r3, [r1]				/*  last byte  */
+-        strneb	r3, [r0]
+-
+-        /* we're done! restore everything and return */
+-1:	ldmfd	sp!, {r5-r11}
+-        ldmfd	sp!, {r0, r4, lr}
+-        tst	lr, #1
+-        moveq	pc, lr
+-        bx	lr
++	/* conditionnaly copies 0 to 31 bytes */
++	movs    r12, r2, lsl #28
++	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
++	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
++	stmcs   r0!, {r4, r5, r6, r7}
++	stmmi   r0!, {r8, r9}
++	movs    r12, r2, lsl #30
++	ldrcs   r3, [r1], #4                    /*  4 bytes */
++	.word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /*  2 bytes */
++	strcs   r3, [r0], #4
++	.word 0x40c040b2 /* strhmi r4, [r0], #2 */
++	tst     r2, #0x1
++	.word 0x15d13000 /* ldrbne r3, [r1] */  /*  last byte  */
++	.word 0x15c03000 /* strbne r3, [r0] */
++
++	/* we're done! restore everything and return */
++1:      ldmfd   sp!, {r5-r11}
++	ldmfd   sp!, {r0, r4, lr}
++	tst     lr, #1
++	moveq   pc, lr
++	bx      lr
+ 
+ 	/********************************************************************/
+ 
+@@ -202,180 +202,180 @@ non_congruent:
+ 	 * (the number of bytes written is always smaller, because we have
+ 	 * partial words in the shift queue)
+ 	 */
+-	cmp	r2, #4
+-	blo	copy_last_3_and_return
++	cmp     r2, #4
++	blo     copy_last_3_and_return
+ 
+-        /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
+-         * frame. Don't update sp.
+-         */
+-        stmea	sp, {r5-r11}
+-
+-        /* compute shifts needed to align src to dest */
+-        rsb	r5, r0, #0
+-        and	r5, r5, #3			/* r5 = # bytes in partial words */
+-        mov	r12, r5, lsl #3		/* r12 = right */
+-        rsb	lr, r12, #32		/* lr = left  */
+-
+-        /* read the first word */
+-        ldr	r3, [r1], #4
+-        sub	r2, r2, #4
+-
+-        /* write a partial word (0 to 3 bytes), such that destination
+-         * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
+-         */
+-        movs	r5, r5, lsl #31
+-        strmib	r3, [r0], #1
+-        movmi	r3, r3, lsr #8
+-        strcsb	r3, [r0], #1
+-        movcs	r3, r3, lsr #8
+-        strcsb	r3, [r0], #1
+-        movcs	r3, r3, lsr #8
+-
+-        cmp	r2, #4
+-        blo	partial_word_tail
+-
+-        /* Align destination to 32 bytes (cache line boundary) */
+-1:	tst	r0, #0x1c
+-        beq	2f
+-        ldr	r5, [r1], #4
+-        sub    	r2, r2, #4
+-        orr	r4, r3, r5,		lsl lr
+-        mov	r3, r5,			lsr r12
+-        str	r4, [r0], #4
+-        cmp    	r2, #4
+-        bhs	1b
+-        blo	partial_word_tail
++	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
++	 * frame. Don't update sp.
++	 */
++	stmea   sp, {r5-r11}
++
++	/* compute shifts needed to align src to dest */
++	rsb     r5, r0, #0
++	and     r5, r5, #3                      /* r5 = # bytes in partial words */
++	mov     r12, r5, lsl #3         /* r12 = right */
++	rsb     lr, r12, #32            /* lr = left  */
++
++	/* read the first word */
++	ldr     r3, [r1], #4
++	sub     r2, r2, #4
++
++	/* write a partial word (0 to 3 bytes), such that destination
++	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
++	 */
++	movs    r5, r5, lsl #31
++	.word 0x44c03001 /* strbmi r3, [r0], #1 */
++	movmi   r3, r3, lsr #8
++	.word 0x24c03001 /* strbcs r3, [r0], #1 */
++	movcs   r3, r3, lsr #8
++	.word 0x24c03001 /* strbcs r3, [r0], #1 */
++	movcs   r3, r3, lsr #8
++
++	cmp     r2, #4
++	blo     partial_word_tail
++
++	/* Align destination to 32 bytes (cache line boundary) */
++1:      tst     r0, #0x1c
++	beq     2f
++	ldr     r5, [r1], #4
++	sub     r2, r2, #4
++	orr     r4, r3, r5,             lsl lr
++	mov     r3, r5,                 lsr r12
++	str     r4, [r0], #4
++	cmp     r2, #4
++	bhs     1b
++	blo     partial_word_tail
+ 
+ 	/* copy 32 bytes at a time */
+-2:	subs	r2, r2, #32
+-	blo	less_than_thirtytwo
++2:      subs    r2, r2, #32
++	blo     less_than_thirtytwo
++
++	/* Use immediate mode for the shifts, because there is an extra cycle
++	 * for register shifts, which could account for up to 50% of
++	 * performance hit.
++	 */
+ 
+-        /* Use immediate mode for the shifts, because there is an extra cycle
+-         * for register shifts, which could account for up to 50% of
+-         * performance hit.
+-         */
+-
+-        cmp	r12, #24
+-        beq	loop24
+-        cmp	r12, #8
+-        beq	loop8
++	cmp     r12, #24
++	beq     loop24
++	cmp     r12, #8
++	beq     loop8
+ 
+ loop16:
+-        ldr    	r12, [r1], #4
+-1:      mov    	r4, r12
+-	ldmia	r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+-        subs   	r2, r2, #32
+-        ldrhs  	r12, [r1], #4
+-	orr	r3, r3, r4, lsl #16
+-        mov	r4, r4, lsr #16
+-        orr	r4, r4, r5, lsl #16
+-        mov	r5, r5, lsr #16
+-        orr	r5, r5, r6, lsl #16
+-        mov	r6, r6, lsr #16
+-        orr	r6, r6, r7, lsl #16
+-        mov	r7, r7, lsr #16
+-        orr	r7, r7, r8, lsl #16
+-        mov	r8, r8, lsr #16
+-        orr	r8, r8, r9, lsl #16
+-        mov	r9, r9, lsr #16
+-        orr	r9, r9, r10, lsl #16
+-        mov	r10, r10,		lsr #16
+-        orr	r10, r10, r11, lsl #16
+-        stmia	r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+-        mov	r3, r11, lsr #16
+-        bhs	1b
+-        b	less_than_thirtytwo
++	ldr     r12, [r1], #4
++1:      mov     r4, r12
++	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
++	subs    r2, r2, #32
++	ldrhs   r12, [r1], #4
++	orr     r3, r3, r4, lsl #16
++	mov     r4, r4, lsr #16
++	orr     r4, r4, r5, lsl #16
++	mov     r5, r5, lsr #16
++	orr     r5, r5, r6, lsl #16
++	mov     r6, r6, lsr #16
++	orr     r6, r6, r7, lsl #16
++	mov     r7, r7, lsr #16
++	orr     r7, r7, r8, lsl #16
++	mov     r8, r8, lsr #16
++	orr     r8, r8, r9, lsl #16
++	mov     r9, r9, lsr #16
++	orr     r9, r9, r10, lsl #16
++	mov     r10, r10,               lsr #16
++	orr     r10, r10, r11, lsl #16
++	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
++	mov     r3, r11, lsr #16
++	bhs     1b
++	b       less_than_thirtytwo
+ 
+ loop8:
+-        ldr    	r12, [r1], #4
+-1:      mov    	r4, r12
+-	ldmia	r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+-	subs	r2, r2, #32
+-        ldrhs  	r12, [r1], #4
+-        orr	r3, r3, r4, lsl #24
+-        mov	r4, r4, lsr #8
+-        orr	r4, r4, r5, lsl #24
+-        mov	r5, r5, lsr #8
+-        orr	r5, r5, r6, lsl #24
+-        mov	r6, r6,	 lsr #8
+-        orr	r6, r6, r7, lsl #24
+-        mov	r7, r7,	 lsr #8
+-        orr	r7, r7, r8,		lsl #24
+-        mov	r8, r8,	 lsr #8
+-        orr	r8, r8, r9,		lsl #24
+-        mov	r9, r9,	 lsr #8
+-        orr	r9, r9, r10,	lsl #24
+-        mov	r10, r10, lsr #8
+-        orr	r10, r10, r11,	lsl #24
+-        stmia	r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+-        mov	r3, r11, lsr #8
+-        bhs	1b
+-        b	less_than_thirtytwo
++	ldr     r12, [r1], #4
++1:      mov     r4, r12
++	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
++	subs    r2, r2, #32
++	ldrhs   r12, [r1], #4
++	orr     r3, r3, r4, lsl #24
++	mov     r4, r4, lsr #8
++	orr     r4, r4, r5, lsl #24
++	mov     r5, r5, lsr #8
++	orr     r5, r5, r6, lsl #24
++	mov     r6, r6,  lsr #8
++	orr     r6, r6, r7, lsl #24
++	mov     r7, r7,  lsr #8
++	orr     r7, r7, r8,             lsl #24
++	mov     r8, r8,  lsr #8
++	orr     r8, r8, r9,             lsl #24
++	mov     r9, r9,  lsr #8
++	orr     r9, r9, r10,    lsl #24
++	mov     r10, r10, lsr #8
++	orr     r10, r10, r11,  lsl #24
++	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
++	mov     r3, r11, lsr #8
++	bhs     1b
++	b       less_than_thirtytwo
+ 
+ loop24:
+-        ldr    	r12, [r1], #4
+-1:      mov    	r4, r12
+-	ldmia	r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+-        subs	r2, r2, #32
+-        ldrhs  	r12, [r1], #4
+-        orr	r3, r3, r4, lsl #8
+-        mov	r4, r4, lsr #24
+-        orr	r4, r4, r5, lsl #8
+-        mov	r5, r5, lsr #24
+-        orr	r5, r5, r6, lsl #8
+-        mov	r6, r6, lsr #24
+-        orr	r6, r6, r7, lsl #8
+-        mov	r7, r7, lsr #24
+-        orr	r7, r7, r8, lsl #8
+-        mov	r8, r8, lsr #24
+-        orr	r8, r8, r9, lsl #8
+-        mov	r9, r9, lsr #24
+-        orr	r9, r9, r10, lsl #8
+-        mov	r10, r10, lsr #24
+-        orr	r10, r10, r11, lsl #8
+-        stmia	r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+-        mov	r3, r11, lsr #24
+-        bhs	1b
++	ldr     r12, [r1], #4
++1:      mov     r4, r12
++	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
++	subs    r2, r2, #32
++	ldrhs   r12, [r1], #4
++	orr     r3, r3, r4, lsl #8
++	mov     r4, r4, lsr #24
++	orr     r4, r4, r5, lsl #8
++	mov     r5, r5, lsr #24
++	orr     r5, r5, r6, lsl #8
++	mov     r6, r6, lsr #24
++	orr     r6, r6, r7, lsl #8
++	mov     r7, r7, lsr #24
++	orr     r7, r7, r8, lsl #8
++	mov     r8, r8, lsr #24
++	orr     r8, r8, r9, lsl #8
++	mov     r9, r9, lsr #24
++	orr     r9, r9, r10, lsl #8
++	mov     r10, r10, lsr #24
++	orr     r10, r10, r11, lsl #8
++	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
++	mov     r3, r11, lsr #24
++	bhs     1b
+ 
+ less_than_thirtytwo:
+ 	/* copy the last 0 to 31 bytes of the source */
+-	rsb	r12, lr, #32		/* we corrupted r12, recompute it  */
+-        add	r2, r2, #32
+-        cmp	r2, #4
+-        blo	partial_word_tail
+-
+-1:	ldr	r5, [r1], #4
+-        sub    	r2, r2, #4
+-        orr	r4, r3, r5,		lsl lr
+-        mov	r3,	r5,			lsr r12
+-        str	r4, [r0], #4
+-        cmp    	r2, #4
+-        bhs	1b
++	rsb     r12, lr, #32            /* we corrupted r12, recompute it  */
++	add     r2, r2, #32
++	cmp     r2, #4
++	blo     partial_word_tail
++
++1:      ldr     r5, [r1], #4
++	sub     r2, r2, #4
++	orr     r4, r3, r5,             lsl lr
++	mov     r3,     r5,                     lsr r12
++	str     r4, [r0], #4
++	cmp     r2, #4
++	bhs     1b
+ 
+ partial_word_tail:
+ 	/* we have a partial word in the input buffer */
+-	movs	r5, lr, lsl #(31-3)
+-	strmib	r3, [r0], #1
+-        movmi	r3, r3, lsr #8
+-        strcsb	r3, [r0], #1
+-        movcs	r3, r3, lsr #8
+-        strcsb	r3, [r0], #1
++	movs    r5, lr, lsl #(31-3)
++	.word 0x44c03001 /* strbmi r3, [r0], #1 */
++	movmi   r3, r3, lsr #8
++	.word 0x24c03001 /* strbcs r3, [r0], #1 */
++	movcs   r3, r3, lsr #8
++	.word 0x24c03001 /* strbcs r3, [r0], #1 */
+ 
+-        /* Refill spilled registers from the stack. Don't update sp. */
+-        ldmfd	sp, {r5-r11}
++	/* Refill spilled registers from the stack. Don't update sp. */
++	ldmfd   sp, {r5-r11}
+ 
+ copy_last_3_and_return:
+-	movs	r2, r2, lsl #31	/* copy remaining 0, 1, 2 or 3 bytes */
+-        ldrmib	r2, [r1], #1
+-        ldrcsb	r3, [r1], #1
+-        ldrcsb	r12,[r1]
+-        strmib	r2, [r0], #1
+-        strcsb	r3, [r0], #1
+-        strcsb	r12,[r0]
+-
+-        /* we're done! restore sp and spilled registers and return */
+-        add    	sp,  sp, #28
+-        ldmfd	sp!, {r0, r4, lr}
+-        tst	lr, #1
+-        moveq	pc, lr
+-        bx	lr
++	movs    r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
++	.word 0x44d12001 /* ldrbmi r2, [r1], #1 */
++	.word 0x24d13001 /* ldrbcs r3, [r1], #1 */
++	.word 0x25d1c000 /* ldrbcs r12,[r1] */
++	.word 0x44c02001 /* strbmi r2, [r0], #1 */
++	.word 0x24c03001 /* strbcs r3, [r0], #1 */
++	.word 0x25c0c000 /* strbcs r12,[r0] */
++
++	/* we're done! restore sp and spilled registers and return */
++	add     sp,  sp, #28
++	ldmfd   sp!, {r0, r4, lr}
++	tst     lr, #1
++	moveq   pc, lr
++	bx      lr
+--- a/src/thread/arm/__set_thread_area.s
++++ b/src/thread/arm/__set_thread_area.s
+@@ -1,12 +1 @@
+-.text
+-.global __set_thread_area
+-.type   __set_thread_area,%function
+-__set_thread_area:
+-	mov r1,r7
+-	mov r7,#0x0f0000
+-	add r7,r7,#5
+-	svc 0
+-	mov r7,r1
+-	tst lr,#1
+-	moveq pc,lr
+-	bx lr
++/* Replaced by C code in arch/arm/src */
+--- a/src/thread/arm/tls.s
++++ /dev/null
+@@ -1,4 +0,0 @@
+-.global __aeabi_read_tp
+-.type __aeabi_read_tp,%function
+-__aeabi_read_tp:
+-	ldr pc,=0xffff0fe0
+--- a/src/thread/pthread_once.c
++++ b/src/thread/pthread_once.c
+@@ -8,15 +8,8 @@ static void undo(void *control)
+ 		__wake(control, -1, 1);
+ }
+ 
+-int __pthread_once(pthread_once_t *control, void (*init)(void))
++int __pthread_once_full(pthread_once_t *control, void (*init)(void))
+ {
+-	/* Return immediately if init finished before, but ensure that
+-	 * effects of the init routine are visible to the caller. */
+-	if (*control == 2) {
+-		a_barrier();
+-		return 0;
+-	}
+-
+ 	/* Try to enter initializing state. Four possibilities:
+ 	 *  0 - we're the first or the other cancelled; run init
+ 	 *  1 - another thread is running init; wait
+@@ -43,4 +36,15 @@ int __pthread_once(pthread_once_t *contr
+ 	}
+ }
+ 
++int __pthread_once(pthread_once_t *control, void (*init)(void))
++{
++	/* Return immediately if init finished before, but ensure that
++	 * effects of the init routine are visible to the caller. */
++	if (*control == 2) {
++		a_barrier();
++		return 0;
++	}
++	return __pthread_once_full(control, init);
++}
++
+ weak_alias(__pthread_once, pthread_once);
diff --git a/toolchain/musl/patches/001-revert-getopt-change.patch b/toolchain/musl/patches/001-revert-getopt-change.patch
new file mode 100644
index 0000000000..a981011394
--- /dev/null
+++ b/toolchain/musl/patches/001-revert-getopt-change.patch
@@ -0,0 +1,113 @@
+commit 84d8240bb5c487034ea3fb674a1de6d884df18aa
+Author: Felix Fietkau <nbd@openwrt.org>
+Date:   Sun Jan 11 19:16:32 2015 +0100
+
+    Revert "add error message printing to getopt_long and make related improvements"
+    
+    This reverts commit 91184c4f16b143107fa9935edebe5d2b20bd70d8.
+
+diff --git a/src/misc/getopt.c b/src/misc/getopt.c
+index 9217983..9db5eaa 100644
+--- a/src/misc/getopt.c
++++ b/src/misc/getopt.c
+@@ -12,7 +12,7 @@ int optind=1, opterr=1, optopt, __optpos, __optreset=0;
+ #define optpos __optpos
+ weak_alias(__optreset, optreset);
+ 
+-void __getopt_msg(const char *a, const char *b, const char *c, size_t l)
++void __getopt_msg(const char *a, const char *b, const char *c, int l)
+ {
+ 	FILE *f = stderr;
+ 	b = __lctrans_cur(b);
+@@ -80,7 +80,7 @@ int getopt(int argc, char * const argv[], const char *optstring)
+ 
+ 	if (d != c) {
+ 		if (optstring[0] != ':' && opterr)
+-			__getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
++			__getopt_msg(argv[0], ": illegal option: ", optchar, k);
+ 		return '?';
+ 	}
+ 	if (optstring[i] == ':') {
+diff --git a/src/misc/getopt_long.c b/src/misc/getopt_long.c
+index e5a4a75..e245ab9 100644
+--- a/src/misc/getopt_long.c
++++ b/src/misc/getopt_long.c
+@@ -2,7 +2,6 @@
+ #include <stddef.h>
+ #include <getopt.h>
+ #include <stdio.h>
+-#include <string.h>
+ 
+ extern int __optpos, __optreset;
+ 
+@@ -16,14 +15,11 @@ static void permute(char *const *argv, int dest, int src)
+ 	av[dest] = tmp;
+ }
+ 
+-void __getopt_msg(const char *, const char *, const char *, size_t);
+-
+ static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly);
+ 
+ static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
+ {
+ 	int ret, skipped, resumed;
+-	const char *optstring2 = optstring + 1;
+ 	if (!optind || __optreset) {
+ 		__optreset = 0;
+ 		__optpos = 0;
+@@ -38,10 +34,9 @@ static int __getopt_long(int argc, char *const *argv, const char *optstring, con
+ 			if (argv[i][0] == '-' && argv[i][1]) break;
+ 		}
+ 		optind = i;
+-		optstring2 = optstring;
+ 	}
+ 	resumed = optind;
+-	ret = __getopt_long_core(argc, argv, optstring2, longopts, idx, longonly);
++	ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly);
+ 	if (resumed > skipped) {
+ 		int i, cnt = optind-resumed;
+ 		for (i=0; i<cnt; i++)
+@@ -78,27 +73,12 @@ static int __getopt_long_core(int argc, char *const *argv, const char *optstring
+ 			optind++;
+ 			optopt = longopts[i].val;
+ 			if (*opt == '=') {
+-				if (!longopts[i].has_arg) {
+-					if (optstring[0] == ':' || !opterr)
+-						return '?';
+-					__getopt_msg(argv[0],
+-						": option does not take an argument: ",
+-						longopts[i].name,
+-						strlen(longopts[i].name));
+-					return '?';
+-				}
++				if (!longopts[i].has_arg) return '?';
+ 				optarg = opt+1;
+ 			} else {
+ 				if (longopts[i].has_arg == required_argument) {
+-					if (!(optarg = argv[optind])) {
+-						if (optstring[0] == ':' || !opterr)
+-							return ':';
+-						__getopt_msg(argv[0],
+-							": option requires an argument: ",
+-							longopts[i].name,
+-							strlen(longopts[i].name));
+-						return '?';
+-					}
++					if (!(optarg = argv[optind]))
++						return ':';
+ 					optind++;
+ 				} else optarg = NULL;
+ 			}
+@@ -110,12 +90,6 @@ static int __getopt_long_core(int argc, char *const *argv, const char *optstring
+ 			return longopts[i].val;
+ 		}
+ 		if (argv[optind][1] == '-') {
+-			if (optstring[0] != ':' && opterr)
+-				__getopt_msg(argv[0], cnt ?
+-					": option is ambiguous: " :
+-					": unrecognized option: ",
+-					argv[optind]+2,
+-					strlen(argv[optind]+2));
+ 			optind++;
+ 			return '?';
+ 		}
diff --git a/toolchain/musl/patches/110-getopt_optional_argument_fix.patch b/toolchain/musl/patches/110-getopt_optional_argument_fix.patch
deleted file mode 100644
index 7deebdc251..0000000000
--- a/toolchain/musl/patches/110-getopt_optional_argument_fix.patch
+++ /dev/null
@@ -1,20 +0,0 @@
---- a/src/misc/getopt.c
-+++ b/src/misc/getopt.c
-@@ -55,7 +55,9 @@ int getopt(int argc, char * const argv[]
- 		return '?';
- 	}
- 	if (optstring[i+1] == ':') {
-+		if (optstring[i+2] == ':') optarg = 0;
- 		if (optind >= argc) {
-+			if (optstring[i+2] == ':') return c;
- 			if (optstring[0] == ':') return ':';
- 			if (opterr) {
- 				write(2, argv[0], strlen(argv[0]));
-@@ -65,7 +67,6 @@ int getopt(int argc, char * const argv[]
- 			}
- 			return '?';
- 		}
--		if (optstring[i+2] == ':') optarg = 0;
- 		if (optstring[i+2] != ':' || optpos) {
- 			optarg = argv[optind++] + optpos;
- 			optpos = 0;
diff --git a/toolchain/musl/patches/120-getopt_non-option-arguments_fix.patch b/toolchain/musl/patches/120-getopt_non-option-arguments_fix.patch
deleted file mode 100644
index 3cd909da46..0000000000
--- a/toolchain/musl/patches/120-getopt_non-option-arguments_fix.patch
+++ /dev/null
@@ -1,43 +0,0 @@
---- a/src/misc/getopt.c
-+++ b/src/misc/getopt.c
-@@ -24,8 +24,23 @@ int getopt(int argc, char * const argv[]
- 		optind = 1;
- 	}
- 
--	if (optind >= argc || !argv[optind] || argv[optind][0] != '-' || !argv[optind][1])
-+	if (optind >= argc || !argv[optind])
- 		return -1;
-+
-+	if (argv[optind][0] != '-') {
-+		/* GNU extension */
-+		if (optstring[0] == '-') {
-+			optarg = argv[optind];
-+			optind++;
-+			return 1;
-+		}
-+
-+		return -1;
-+	}
-+
-+	if (!argv[optind][1])
-+		return -1;
-+
- 	if (argv[optind][1] == '-' && !argv[optind][2])
- 		return optind++, -1;
- 
---- a/src/misc/getopt_long.c
-+++ b/src/misc/getopt_long.c
-@@ -12,7 +12,12 @@ static int __getopt_long(int argc, char
- 		__optpos = 0;
- 		optind = 1;
- 	}
--	if (optind >= argc || !argv[optind] || argv[optind][0] != '-') return -1;
-+	if (optind >= argc || !argv[optind])
-+		return -1;
-+
-+	if (argv[optind][0] != '-')
-+		return getopt(argc, argv, optstring);
-+
- 	if ((longonly && argv[optind][1]) ||
- 		(argv[optind][1] == '-' && argv[optind][2]))
- 	{
-- 
2.30.2