Commit 9e3ec61a authored by Ariadne Conill's avatar Ariadne Conill 🐰
Browse files

main/musl: security upgrade to 1.2.2_pre2, CVE-2020-28928

parent a552ed1d
From cbecda0b506c7d49a2f7fe3dc44e0e3dcf663764 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Tue, 10 Nov 2020 14:29:05 -0500
Subject: [PATCH 1/5] dlerror: don't gratuitously hold freebuf_queue lock while
freeing
thread-local buffers allocated for dlerror need to be queued for free
at a later time when the owning thread exits, since malloc may be
replaced by application code and the exiting context is not valid to
call application code from. the code to process queue of pending
frees, introduced in commit aa5a9d15e09851f7b4a1668e9dbde0f6234abada,
gratuitously held the lock for the entire duration of queue
processing, updating the global queue pointer after each free, despite
there being no logical requirement that all frees finish before
another thread can access the queue.
instead, immediately claim the whole queue for freeing and release the
lock, then walk the list and perform frees without the lock held. the
change is unlikely to make any meaningful difference to performance,
but it eliminates one point where the allocator is called under an
internal lock. since the allocator may be application-provided, such
calls are undesirable because they allow application code to impede
forward progress of libc functions in other threads arbitrarily long,
and to induce deadlock if it calls a libc function that requires the
same lock.
the change also eliminates a lock ordering consideration that's an
impediment upcoming work with multithreaded fork.
---
src/ldso/dlerror.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c
index 3fcc7779..d8bbfc03 100644
--- a/src/ldso/dlerror.c
+++ b/src/ldso/dlerror.c
@@ -35,13 +35,16 @@ void __dl_thread_cleanup(void)
hidden void __dl_vseterr(const char *fmt, va_list ap)
{
LOCK(freebuf_queue_lock);
- while (freebuf_queue) {
- void **p = freebuf_queue;
- freebuf_queue = *p;
- free(p);
- }
+ void **q = freebuf_queue;
+ freebuf_queue = 0;
UNLOCK(freebuf_queue_lock);
+ while (q) {
+ void **p = *q;
+ free(q);
+ q = p;
+ }
+
va_list ap2;
va_copy(ap2, ap);
pthread_t self = __pthread_self();
--
2.21.0
From c1e5d243b7e39b2fbfb17144608ce045575d8e95 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Tue, 10 Nov 2020 19:32:09 -0500
Subject: [PATCH 2/5] drop use of getdelim/stdio in dynamic linker
the only place stdio was used here was for reading the ldso path file,
taking advantage of getdelim to automatically allocate and resize the
buffer. the motivation for use here was that, with shared libraries,
stdio is already available anyway and free to use. this has long been
a nuisance to users because getdelim's use of realloc here triggered a
valgrind bug, but removing it doesn't really fix that; on some archs
even calling the valgrind-interposed malloc at this point will crash.
the actual motivation for this change is moving towards getting rid of
use of application-provided malloc in parts of libc where it would be
called with libc-internal locks held, leading to the possibility of
deadlock if the malloc implementation doesn't follow unwritten rules
about which libc functions are safe for it to call. since getdelim is
required to produce a pointer as if by malloc (i.e. that can be passed
to reallor or free), it necessarily must use the public malloc.
instead of performing a realloc loop as the path file is read, first
query its size with fstat and allocate only once. this produces
slightly different truncation behavior when racing with writes to a
file, but neither behavior is or could be made safe anyway; on a live
system, ldso path files should be replaced by atomic rename only. the
change should also reduce memory waste.
---
ldso/dynlink.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index f9ac0100..502e52c5 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1,6 +1,5 @@
#define _GNU_SOURCE
#define SYSCALL_NO_TLS 1
-#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stddef.h>
@@ -556,6 +555,20 @@ static void reclaim_gaps(struct dso *dso)
}
}
+static ssize_t read_loop(int fd, void *p, size_t n)
+{
+ for (size_t i=0; i<n; ) {
+ ssize_t l = read(fd, (char *)p+i, n-i);
+ if (l<0) {
+ if (errno==EINTR) continue;
+ else return -1;
+ }
+ if (l==0) return i;
+ i += l;
+ }
+ return n;
+}
+
static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
{
static int no_map_fixed;
@@ -1060,13 +1073,17 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
snprintf(etc_ldso_path, sizeof etc_ldso_path,
"%.*s/etc/ld-musl-" LDSO_ARCH ".path",
(int)prefix_len, prefix);
- FILE *f = fopen(etc_ldso_path, "rbe");
- if (f) {
- if (getdelim(&sys_path, (size_t[1]){0}, 0, f) <= 0) {
+ fd = open(etc_ldso_path, O_RDONLY|O_CLOEXEC);
+ if (fd>=0) {
+ size_t n = 0;
+ if (!fstat(fd, &st)) n = st.st_size;
+ if ((sys_path = malloc(n+1)))
+ sys_path[n] = 0;
+ if (!sys_path || read_loop(fd, sys_path, n)<0) {
free(sys_path);
sys_path = "";
}
- fclose(f);
+ close(fd);
} else if (errno != ENOENT) {
sys_path = "";
}
--
2.21.0
From 8d37958d58cf36f53d5fcc7a8aa6d633da6071b2 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Wed, 11 Nov 2020 00:22:34 -0500
Subject: [PATCH 3/5] give libc access to its own malloc even if public malloc
is interposed
allowing the application to replace malloc (since commit
c9f415d7ea2dace5bf77f6518b6afc36bb7a5732) has brought multiple
headaches where it's used from various critical sections in libc
components. for example:
- the thread-local message buffers allocated for dlerror can't be
freed at thread exit time because application code would then run in
the context of a non-existant thread. this was handled in commit
aa5a9d15e09851f7b4a1668e9dbde0f6234abada by queuing them for free
later.
- the dynamic linker has to be careful not to pass memory allocated at
early startup time (necessarily using its own malloc) to realloc or
free after redoing relocations with the application and all
libraries present. bugs in this area were fixed several times, at
least in commits 0c5c8f5da6e36fe4ab704bee0cd981837859e23f and
2f1f51ae7b2d78247568e7fdb8462f3c19e469a4 and possibly others.
- by calling the allocator from contexts where libc-internal locks are
held, we impose undocumented requirements on alternate malloc
implementations not to call into any libc function that might
attempt to take these locks; if they do, deadlock results.
- work to make fork of a multithreaded parent give the child an
unrestricted execution environment is blocked by lock order issues
as long as the application-provided allocator can be called with
libc-internal locks held.
these problems are all fixed by giving libc internals access to the
original, non-replaced allocator, for use where needed. it can't be
used everywhere, as some interfaces like str[n]dup, open_[w]memstream,
getline/getdelim, etc. are required to provide the called memory
obtained as if by (the public) malloc. and there are a number of libc
interfaces that are "pure library" code, not part of some internal
singleton, and where using the application's choice of malloc
implementation is preferable -- things like glob, regex, etc.
one might expect there to be significant cost to static-linked
programs, pulling in two malloc implementations, one of them
mostly-unused, if malloc is replaced. however, in almost all of the
places where malloc is used internally, care has been taken already
not to pull in realloc/free (i.e. to link with just the bump
allocator). this size optimization carries over automatically.
the newly-exposed internal allocator functions are obtained by
renaming the actual definitions, then adding new wrappers around them
with the public names. technically __libc_realloc and __libc_free
could be aliases rather than needing a layer of wrapper, but this
would almost surely break certain instrumentation (valgrind) and the
size and performance difference is negligible. __libc_calloc needs to
be handled specially since calloc is designed to work with either the
internal or the replaced malloc.
as a bonus, this change also eliminates the longstanding ugly
dependency of the static bump allocator on order of object files in
libc.a, by making it so there's only one definition of the malloc
function and having it in the same source file as the bump allocator.
---
src/include/stdlib.h | 6 ++++++
src/malloc/free.c | 6 ++++++
src/malloc/libc_calloc.c | 4 ++++
src/malloc/lite_malloc.c | 14 +++++++++++++-
src/malloc/mallocng/glue.h | 4 ++++
src/malloc/oldmalloc/malloc.c | 4 ++++
src/malloc/realloc.c | 6 ++++++
7 files changed, 43 insertions(+), 1 deletion(-)
create mode 100644 src/malloc/free.c
create mode 100644 src/malloc/libc_calloc.c
create mode 100644 src/malloc/realloc.c
diff --git a/src/include/stdlib.h b/src/include/stdlib.h
index d38a5417..e9da2015 100644
--- a/src/include/stdlib.h
+++ b/src/include/stdlib.h
@@ -9,4 +9,10 @@ hidden int __mkostemps(char *, int, int);
hidden int __ptsname_r(int, char *, size_t);
hidden char *__randname(char *);
+hidden void *__libc_malloc(size_t);
+hidden void *__libc_malloc_impl(size_t);
+hidden void *__libc_calloc(size_t, size_t);
+hidden void *__libc_realloc(void *, size_t);
+hidden void __libc_free(void *);
+
#endif
diff --git a/src/malloc/free.c b/src/malloc/free.c
new file mode 100644
index 00000000..f17a952c
--- /dev/null
+++ b/src/malloc/free.c
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+
+void free(void *p)
+{
+ return __libc_free(p);
+}
diff --git a/src/malloc/libc_calloc.c b/src/malloc/libc_calloc.c
new file mode 100644
index 00000000..d25eabea
--- /dev/null
+++ b/src/malloc/libc_calloc.c
@@ -0,0 +1,4 @@
+#define calloc __libc_calloc
+#define malloc __libc_malloc
+
+#include "calloc.c"
diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c
index f8931ba5..0f461617 100644
--- a/src/malloc/lite_malloc.c
+++ b/src/malloc/lite_malloc.c
@@ -100,4 +100,16 @@ static void *__simple_malloc(size_t n)
return p;
}
-weak_alias(__simple_malloc, malloc);
+weak_alias(__simple_malloc, __libc_malloc_impl);
+
+void *__libc_malloc(size_t n)
+{
+ return __libc_malloc_impl(n);
+}
+
+static void *default_malloc(size_t n)
+{
+ return __libc_malloc_impl(n);
+}
+
+weak_alias(default_malloc, malloc);
diff --git a/src/malloc/mallocng/glue.h b/src/malloc/mallocng/glue.h
index 16acd1ea..8d7d9a3b 100644
--- a/src/malloc/mallocng/glue.h
+++ b/src/malloc/mallocng/glue.h
@@ -20,6 +20,10 @@
#define is_allzero __malloc_allzerop
#define dump_heap __dump_heap
+#define malloc __libc_malloc_impl
+#define realloc __libc_realloc
+#define free __libc_free
+
#if USE_REAL_ASSERT
#include <assert.h>
#else
diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c
index c0997ad8..0c082bce 100644
--- a/src/malloc/oldmalloc/malloc.c
+++ b/src/malloc/oldmalloc/malloc.c
@@ -10,6 +10,10 @@
#include "pthread_impl.h"
#include "malloc_impl.h"
+#define malloc __libc_malloc
+#define realloc __libc_realloc
+#define free __libc_free
+
#if defined(__GNUC__) && defined(__PIC__)
#define inline inline __attribute__((always_inline))
#endif
diff --git a/src/malloc/realloc.c b/src/malloc/realloc.c
new file mode 100644
index 00000000..fb0e8b7c
--- /dev/null
+++ b/src/malloc/realloc.c
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+
+void *realloc(void *p, size_t n)
+{
+ return __libc_realloc(p, n);
+}
--
2.21.0
From 34952fe5de44a833370cbe87b63fb8eec61466d7 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Wed, 11 Nov 2020 13:08:42 -0500
Subject: [PATCH 4/5] convert malloc use under libc-internal locks to use
internal allocator
this change lifts undocumented restrictions on calls by replacement
mallocs to libc functions that might take these locks, and sets the
stage for lifting restrictions on the child execution environment
after multithreaded fork.
care is taken to #define macros to replace all four functions (malloc,
calloc, realloc, free) even if not all of them will be used, using an
undefined symbol name for the ones intended not to be used so that any
inadvertent future use will be caught at compile time rather than
directed to the wrong implementation.
---
ldso/dynlink.c | 5 +++++
src/aio/aio.c | 5 +++++
src/exit/atexit.c | 5 +++++
src/ldso/dlerror.c | 5 +++++
src/locale/dcngettext.c | 5 +++++
src/locale/locale_map.c | 6 ++++++
src/thread/sem_open.c | 5 +++++
src/time/__tz.c | 5 +++++
8 files changed, 41 insertions(+)
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index 502e52c5..61714f40 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -23,6 +23,11 @@
#include "libc.h"
#include "dynlink.h"
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc __libc_realloc
+#define free __libc_free
+
static void error(const char *, ...);
#define MAXP2(a,b) (-(-(a)&-(b)))
diff --git a/src/aio/aio.c b/src/aio/aio.c
index b488e3d6..e004f98b 100644
--- a/src/aio/aio.c
+++ b/src/aio/aio.c
@@ -11,6 +11,11 @@
#include "pthread_impl.h"
#include "aio_impl.h"
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc __libc_realloc
+#define free __libc_free
+
/* The following is a threads-based implementation of AIO with minimal
* dependence on implementation details. Most synchronization is
* performed with pthread primitives, but atomics and futex operations
diff --git a/src/exit/atexit.c b/src/exit/atexit.c
index 160d277a..fcd940fa 100644
--- a/src/exit/atexit.c
+++ b/src/exit/atexit.c
@@ -3,6 +3,11 @@
#include "libc.h"
#include "lock.h"
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free undef
+
/* Ensure that at least 32 atexit handlers can be registered without malloc */
#define COUNT 32
diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c
index d8bbfc03..c782ca6c 100644
--- a/src/ldso/dlerror.c
+++ b/src/ldso/dlerror.c
@@ -5,6 +5,11 @@
#include "dynlink.h"
#include "lock.h"
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc __libc_realloc
+#define free __libc_free
+
char *dlerror()
{
pthread_t self = __pthread_self();
diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
index 4c304393..39a98e83 100644
--- a/src/locale/dcngettext.c
+++ b/src/locale/dcngettext.c
@@ -11,6 +11,11 @@
#include "pleval.h"
#include "lock.h"
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free undef
+
struct binding {
struct binding *next;
int dirlen;
diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c
index e7eede62..94f1b04e 100644
--- a/src/locale/locale_map.c
+++ b/src/locale/locale_map.c
@@ -1,10 +1,16 @@
#include <locale.h>
#include <string.h>
#include <sys/mman.h>
+#include <stdlib.h>
#include "locale_impl.h"
#include "libc.h"
#include "lock.h"
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
+
const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
{
const char *trans = 0;
diff --git a/src/thread/sem_open.c b/src/thread/sem_open.c
index 6fb0c5b2..dad8f177 100644
--- a/src/thread/sem_open.c
+++ b/src/thread/sem_open.c
@@ -13,6 +13,11 @@
#include <pthread.h>
#include "lock.h"
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free undef
+
static struct {
ino_t ino;
sem_t *sem;
diff --git a/src/time/__tz.c b/src/time/__tz.c
index 49a7371e..3044d206 100644
--- a/src/time/__tz.c
+++ b/src/time/__tz.c
@@ -7,6 +7,11 @@
#include "libc.h"
#include "lock.h"
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
+
long __timezone = 0;
int __daylight = 0;
char *__tzname[2] = { 0, 0 };
--
2.21.0
From 167390f05564e0a4d3fcb4329377fd7743267560 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Wed, 11 Nov 2020 13:37:33 -0500
Subject: [PATCH] lift child restrictions after multi-threaded fork
as the outcome of Austin Group tracker issue #62, future editions of
POSIX have dropped the requirement that fork be AS-safe. this allows
but does not require implementations to synchronize fork with internal
locks and give forked children of multithreaded parents a partly or
fully unrestricted execution environment where they can continue to
use the standard library (per POSIX, they can only portably use
AS-safe functions).
up until recently, taking this allowance did not seem desirable.
however, commit 8ed2bd8bfcb4ea6448afb55a941f4b5b2b0398c0 exposed the
extent to which applications and libraries are depending on the
ability to use malloc and other non-AS-safe interfaces in MT-forked
children, by converting latent very-low-probability catastrophic state
corruption into predictable deadlock. dealing with the fallout has
been a huge burden for users/distros.
while it looks like most of the non-portable usage in applications
could be fixed given sufficient effort, at least some of it seems to
occur in language runtimes which are exposing the ability to run
unrestricted code in the child as part of the contract with the
programmer. any attempt at fixing such contracts is not just a
technical problem but a social one, and is probably not tractable.
this patch extends the fork function to take locks for all libc
singletons in the parent, and release or reset those locks in the
child, so that when the underlying fork operation takes place, the
state protected by these locks is consistent and ready for the child
to use. locking is skipped in the case where the parent is
single-threaded so as not to interfere with legacy AS-safety property
of fork in single-threaded programs. lock order is mostly arbitrary,
but the malloc locks (including bump allocator in case it's used) must
be taken after the locks on any subsystems that might use malloc, and
non-AS-safe locks cannot be taken while the thread list lock is held,
imposing a requirement that it be taken last.
---
ldso/dynlink.c | 19 ++++++++++
src/exit/at_quick_exit.c | 2 +
src/exit/atexit.c | 2 +
src/internal/fork_impl.h | 19 ++++++++++
src/ldso/dlerror.c | 2 +
src/locale/dcngettext.c | 5 ++-
src/locale/locale_map.c | 5 ++-
src/malloc/lite_malloc.c | 5 ++-
src/malloc/mallocng/glue.h | 14 ++++++-
src/malloc/oldmalloc/malloc.c | 19 ++++++++++
src/misc/syslog.c | 2 +
src/prng/random.c | 2 +
src/process/fork.c | 70 +++++++++++++++++++++++++++++++++++
src/stdio/ofl.c | 2 +
src/thread/sem_open.c | 2 +
src/thread/vmlock.c | 2 +
src/time/__tz.c | 2 +
17 files changed, 170 insertions(+), 4 deletions(-)
create mode 100644 src/internal/fork_impl.h
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index 61714f40..6b868c84 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -20,6 +20,7 @@
#include <semaphore.h>
#include <sys/membarrier.h>
#include "pthread_impl.h"
+#include "fork_impl.h"
#include "libc.h"
#include "dynlink.h"
@@ -1426,6 +1427,17 @@ void __libc_exit_fini()
}
}
+void __ldso_atfork(int who)
+{
+ if (who<0) {
+ pthread_rwlock_wrlock(&lock);
+ pthread_mutex_lock(&init_fini_lock);
+ } else {
+ pthread_mutex_unlock(&init_fini_lock);
+ pthread_rwlock_unlock(&lock);
+ }
+}
+
static struct dso **queue_ctors(struct dso *dso)
{
size_t cnt, qpos, spos, i;
@@ -1484,6 +1496,13 @@ static struct dso **queue_ctors(struct dso *dso)
}
queue[qpos] = 0;
for (i=0; i<qpos; i++) queue[i]->mark = 0;
+ for (i=0; i<qpos; i++)
+ if (queue[i]->ctor_visitor && queue[i]->ctor_visitor->tid < 0) {
+ error("State of %s is inconsistent due to multithreaded fork\n",
+ queue[i]->name);
+ free(queue);
+ if (runtime) longjmp(*rtld_fail, 1);
+ }
return queue;
}
diff --git a/src/exit/at_quick_exit.c b/src/exit/at_quick_exit.c
index d3ce6522..e4b5d78d 100644
--- a/src/exit/at_quick_exit.c
+++ b/src/exit/at_quick_exit.c
@@ -1,12 +1,14 @@
#include <stdlib.h>
#include "libc.h"
#include "lock.h"
+#include "fork_impl.h"
#define COUNT 32