From: "Mike Frysinger" <vapier@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/sandbox:master commit in: libsandbox/wrapper-funcs/, tests/, libsandbox/
Date: Tue, 29 Mar 2016 12:24:34 +0000 (UTC) [thread overview]
Message-ID: <1455668633.55087abd8dc9802cf68cade776fe612a3f19f6a1.vapier@gentoo> (raw)
commit: 55087abd8dc9802cf68cade776fe612a3f19f6a1
Author: Mike Frysinger <vapier <AT> gentoo <DOT> org>
AuthorDate: Wed Feb 17 00:23:53 2016 +0000
Commit: Mike Frysinger <vapier <AT> gentoo <DOT> org>
CommitDate: Wed Feb 17 00:23:53 2016 +0000
URL: https://gitweb.gentoo.org/proj/sandbox.git/commit/?id=55087abd
libsandbox: use ptrace on apps that interpose their own allocator
If an app installs its own memory allocator by overriding the internal
glibc symbols, then we can easily hit a loop that cannot be broken: the
dlsym functions can attempt to allocate memory, and sandbox relies on
them to find the "real" functions. So when someone calls a symbol that
the sandbox protects, we call dlsym, and that calls malloc, which calls
back into the app, and their allocator might use another symbol such as
open ... which is protected by the sandbox. So we hit the loop like:
-> open -> libsandbox:open -> dlsym -> malloc -> open ->
libsandbox:open -> dlsym -> malloc -> ...
Change the exec checking logic to scan the ELF instead. If it exports
these glibc symbols, then we have to assume it can trigger a loop, so
scrub the sandbox environment to prevent us from being loaded. Then we
use the out-of-process tracer (i.e. ptrace). This should generally be
as robust anyways ... if it's not, that's a bug we want to fix as this
is the same code used for static apps.
URL: http://crbug.com/586444
Reported-by: Ryo Hashimoto <hashimoto <AT> chromium.org>
Signed-off-by: Mike Frysinger <vapier <AT> gentoo.org>
libsandbox/libsandbox.c | 72 ++++++++++-------
libsandbox/libsandbox.h | 2 +-
libsandbox/wrapper-funcs/__wrapper_exec.c | 126 ++++++++++++++++++++++++++----
tests/Makefile.am | 3 +
tests/malloc-2.sh | 4 +
tests/malloc.at | 1 +
tests/malloc_hooked_tst.c | 44 +++++++++++
7 files changed, 210 insertions(+), 42 deletions(-)
diff --git a/libsandbox/libsandbox.c b/libsandbox/libsandbox.c
index 2bcff95..7555862 100644
--- a/libsandbox/libsandbox.c
+++ b/libsandbox/libsandbox.c
@@ -1137,7 +1137,7 @@ typedef struct {
* XXX: Might be much nicer if we could serialize these vars behind the back of
* the program. Might be hard to handle LD_PRELOAD though ...
*/
-char **sb_check_envp(char **envp, size_t *mod_cnt)
+char **sb_check_envp(char **envp, size_t *mod_cnt, bool insert)
{
char **my_env;
char *entry;
@@ -1194,7 +1194,8 @@ char **sb_check_envp(char **envp, size_t *mod_cnt)
}
/* If we found everything, there's nothing to do! */
- if (num_vars == found_var_cnt)
+ if ((insert && num_vars == found_var_cnt) ||
+ (!insert && found_var_cnt == 0))
/* Use the user's envp */
return envp;
@@ -1217,33 +1218,50 @@ char **sb_check_envp(char **envp, size_t *mod_cnt)
}
my_env = NULL;
- if (mod_cnt) {
- /* Count directly due to variability with LD_PRELOAD and the value
- * logic below. Getting out of sync can mean memory corruption. */
- *mod_cnt = 0;
- if (unlikely(merge_ld_preload)) {
- str_list_add_item(my_env, ld_preload, error);
- (*mod_cnt)++;
- }
- for (i = 0; i < num_vars; ++i) {
- if (found_vars[i] || !vars[i].value)
- continue;
- str_list_add_item_env(my_env, vars[i].name, vars[i].value, error);
- (*mod_cnt)++;
- }
-
- str_list_for_each_item(envp, entry, count) {
- if (unlikely(merge_ld_preload && is_env_var(entry, vars[0].name, vars[0].len)))
- continue;
- str_list_add_item(my_env, entry, error);
+ if (!insert) {
+ if (mod_cnt) {
+ str_list_for_each_item(envp, entry, count) {
+ for (i = 0; i < num_vars; ++i)
+ if (is_env_var(entry, vars[i].name, vars[i].len)) {
+ (*mod_cnt)++;
+ goto skip;
+ }
+ str_list_add_item(my_env, entry, error);
+ skip: ;
+ }
+ } else {
+ for (i = 0; i < num_vars; ++i)
+ unsetenv(vars[i].name);
}
} else {
- if (unlikely(merge_ld_preload))
- putenv(ld_preload);
- for (i = 0; i < num_vars; ++i) {
- if (found_vars[i] || !vars[i].value)
- continue;
- setenv(vars[i].name, vars[i].value, 1);
+ if (mod_cnt) {
+ /* Count directly due to variability with LD_PRELOAD and the value
+ * logic below. Getting out of sync can mean memory corruption. */
+ *mod_cnt = 0;
+ if (unlikely(merge_ld_preload)) {
+ str_list_add_item(my_env, ld_preload, error);
+ (*mod_cnt)++;
+ }
+ for (i = 0; i < num_vars; ++i) {
+ if (found_vars[i] || !vars[i].value)
+ continue;
+ str_list_add_item_env(my_env, vars[i].name, vars[i].value, error);
+ (*mod_cnt)++;
+ }
+
+ str_list_for_each_item(envp, entry, count) {
+ if (unlikely(merge_ld_preload && is_env_var(entry, vars[0].name, vars[0].len)))
+ continue;
+ str_list_add_item(my_env, entry, error);
+ }
+ } else {
+ if (unlikely(merge_ld_preload))
+ putenv(ld_preload);
+ for (i = 0; i < num_vars; ++i) {
+ if (found_vars[i] || !vars[i].value)
+ continue;
+ setenv(vars[i].name, vars[i].value, 1);
+ }
}
}
diff --git a/libsandbox/libsandbox.h b/libsandbox/libsandbox.h
index 596084d..63882e7 100644
--- a/libsandbox/libsandbox.h
+++ b/libsandbox/libsandbox.h
@@ -56,7 +56,7 @@ void *get_dlsym(const char *symname, const char *symver);
extern char sandbox_lib[SB_PATH_MAX];
extern bool sandbox_on;
-char **sb_check_envp(char **envp, size_t *mod_cnt);
+char **sb_check_envp(char **envp, size_t *mod_cnt, bool insert);
void sb_cleanup_envp(char **envp, size_t mod_cnt);
extern pid_t trace_pid;
diff --git a/libsandbox/wrapper-funcs/__wrapper_exec.c b/libsandbox/wrapper-funcs/__wrapper_exec.c
index 7107c21..f7f51ab 100644
--- a/libsandbox/wrapper-funcs/__wrapper_exec.c
+++ b/libsandbox/wrapper-funcs/__wrapper_exec.c
@@ -20,17 +20,20 @@ static WRAPPER_RET_TYPE (*WRAPPER_TRUE_NAME)(WRAPPER_ARGS_PROTO) = NULL;
#ifndef SB_EXEC_COMMON
#define SB_EXEC_COMMON
-/* Check to see if this a static ELF and if so, protect using trace mechanisms */
-static void sb_check_exec(const char *filename, char *const argv[])
+/* Check to see if we can run this program in-process. If not, try to fall back
+ * tracing it out-of-process via some trace mechanisms (e.g. ptrace).
+ */
+static bool sb_check_exec(const char *filename, char *const argv[])
{
int fd;
unsigned char *elf;
struct stat st;
bool do_trace = false;
+ bool run_in_process = true;
- fd = open(filename, O_RDONLY|O_CLOEXEC);
+ fd = sb_unwrapped_open_DEFAULT(filename, O_RDONLY|O_CLOEXEC, 0);
if (fd == -1)
- return;
+ return true;
if (fstat(fd, &st))
goto out_fd;
if (st.st_size < sizeof(Elf64_Ehdr))
@@ -57,19 +60,110 @@ static void sb_check_exec(const char *filename, char *const argv[])
* gains root just to preload libsandbox.so. That unfortunately
* could easily open up people to root vulns.
*/
- if (getuid() == 0 || !(st.st_mode & (S_ISUID | S_ISGID))) {
+ if (st.st_mode & (S_ISUID | S_ISGID))
+ if (getuid() != 0)
+ run_in_process = false;
+
+ /* We also need to ptrace programs that interpose their own allocator.
+ * http://crbug.com/586444
+ */
+ if (run_in_process) {
+ static const char * const libc_alloc_syms[] = {
+ "__libc_calloc",
+ "__libc_free",
+ "__libc_malloc",
+ "__libc_realloc",
+ "__malloc_hook",
+ "__realloc_hook",
+ "__free_hook",
+ "__memalign_hook",
+ "__malloc_initialize_hook",
+ };
#define PARSE_ELF(n) \
({ \
Elf##n##_Ehdr *ehdr = (void *)elf; \
Elf##n##_Phdr *phdr = (void *)(elf + ehdr->e_phoff); \
- uint16_t p; \
- if (st.st_size < sizeof(*ehdr)) \
- goto out_mmap; \
+ Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0; \
+ Elf##n##_Off offset, symoff = 0, stroff = 0; \
+ Elf##n##_Dyn *dyn; \
+ Elf##n##_Sym *sym; \
+ uint##n##_t ent_size = 0, str_size = 0; \
+ bool dynamic = false; \
+ size_t i; \
+ \
if (st.st_size < ehdr->e_phoff + ehdr->e_phentsize * ehdr->e_phnum) \
goto out_mmap; \
- for (p = 0; p < ehdr->e_phnum; ++p) \
- if (phdr[p].p_type == PT_INTERP) \
- goto done; \
+ \
+ /* First gather the tags we care about. */ \
+ for (i = 0; i < ehdr->e_phnum; ++i) { \
+ switch (phdr[i].p_type) { \
+ case PT_INTERP: dynamic = true; break; \
+ case PT_DYNAMIC: \
+ dyn = (void *)(elf + phdr[i].p_offset); \
+ while (dyn->d_tag != DT_NULL) { \
+ switch (dyn->d_tag) { \
+ case DT_SYMTAB: vsym = dyn->d_un.d_val; break; \
+ case DT_SYMENT: ent_size = dyn->d_un.d_val; break; \
+ case DT_STRTAB: vstr = dyn->d_un.d_val; break; \
+ case DT_STRSZ: str_size = dyn->d_un.d_val; break; \
+ } \
+ ++dyn; \
+ } \
+ break; \
+ } \
+ } \
+ \
+ if (dynamic && vsym && ent_size && vstr && str_size) { \
+ /* Figure out where in the file these tables live. */ \
+ for (i = 0; i < ehdr->e_phnum; ++i) { \
+ vaddr = phdr[i].p_vaddr; \
+ filesz = phdr[i].p_filesz; \
+ offset = phdr[i].p_offset; \
+ if (vsym >= vaddr && vsym < vaddr + filesz) \
+ symoff = offset + (vsym - vaddr); \
+ if (vstr >= vaddr && vstr < vaddr + filesz) \
+ stroff = offset + (vstr - vaddr); \
+ } \
+ \
+ /* Finally walk the symbol table. This should generally be fast as \
+ * we only look at exported symbols, and the vast majority of exes \
+ * out there do not export any symbols at all. \
+ */ \
+ if (symoff && stroff) { \
+ sym = (void *)(elf + symoff); \
+ /* Nowhere is the # of symbols recorded, or the size of the symbol \
+ * table. Instead, we do what glibc does: assume that the string \
+ * table always follows the symbol table. This seems like a poor \
+ * assumption to make, but glibc has gotten by this long. We could \
+ * rely on DT_HASH and walking all the buckets to find the largest \
+ * symbol index, but that's also a bit hacky. \
+ * \
+ * We don't sanity check the ranges here as you aren't executing \
+ * corrupt programs in the sandbox. \
+ */ \
+ for (i = 0; i < (vstr - vsym) / ent_size; ++i) { \
+ char *symname = (void *)(elf + stroff + sym->st_name); \
+ if (ELF##n##_ST_VISIBILITY(sym->st_other) == STV_DEFAULT && \
+ sym->st_shndx != SHN_UNDEF && sym->st_shndx < SHN_LORESERVE && \
+ sym->st_name && \
+ /* Minor optimization to avoid strcmp. */ \
+ symname[0] == '_' && symname[1] == '_') { \
+ /* Blacklist internal C library symbols. */ \
+ size_t j; \
+ for (j = 0; j < ARRAY_SIZE(libc_alloc_syms); ++j) \
+ if (!strcmp(symname, libc_alloc_syms[j])) { \
+ run_in_process = false; \
+ goto use_trace; \
+ } \
+ } \
+ ++sym; \
+ } \
+ } \
+ \
+ } \
+ \
+ if (dynamic) \
+ goto done; \
})
if (elf[EI_CLASS] == ELFCLASS32)
PARSE_ELF(32);
@@ -78,6 +172,7 @@ static void sb_check_exec(const char *filename, char *const argv[])
#undef PARSE_ELF
}
+ use_trace:
do_trace = trace_possible(filename, argv, elf);
/* Now that we're done with stuff, clean up before forking */
@@ -90,6 +185,8 @@ static void sb_check_exec(const char *filename, char *const argv[])
if (do_trace)
trace_main(filename, argv);
+
+ return run_in_process;
}
#endif
@@ -107,6 +204,7 @@ WRAPPER_RET_TYPE SB_HIDDEN_FUNC(WRAPPER_NAME)(WRAPPER_ARGS_PROTO_FULL)
WRAPPER_RET_TYPE WRAPPER_NAME(WRAPPER_ARGS_PROTO)
{
WRAPPER_RET_TYPE result = WRAPPER_RET_DEFAULT;
+ bool run_in_process = true;
/* The C library may implement some exec funcs by calling other
* exec funcs. So we might get a little sandbox recursion going
@@ -151,15 +249,15 @@ WRAPPER_RET_TYPE WRAPPER_NAME(WRAPPER_ARGS_PROTO)
if (!SB_SAFE(check_path))
goto done;
- sb_check_exec(check_path, argv);
+ run_in_process = sb_check_exec(check_path, argv);
}
#endif
#ifdef EXEC_MY_ENV
size_t mod_cnt;
- char **my_env = sb_check_envp((char **)envp, &mod_cnt);
+ char **my_env = sb_check_envp((char **)envp, &mod_cnt, run_in_process);
#else
- sb_check_envp(environ, NULL);
+ sb_check_envp(environ, NULL, run_in_process);
#endif
restore_errno();
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 943ce3b..d98898f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -73,6 +73,7 @@ check_PROGRAMS = \
\
getcwd-gnulib_tst \
libsigsegv_tst \
+ malloc_hooked_tst \
malloc_mmap_tst \
pipe-fork_tst \
pipe-fork_static_tst \
@@ -92,6 +93,8 @@ AM_LDFLAGS = `expr $@ : .*_static >/dev/null && echo -all-static`
sb_printf_tst_CFLAGS = -I$(top_srcdir)/libsbutil -I$(top_srcdir)/libsbutil/include
sb_printf_tst_LDADD = $(top_builddir)/libsbutil/libsbutil.la
+malloc_hooked_tst_LDFLAGS = $(AM_LDFLAGS) -pthread
+
if HAVE_LIBSIGSEGV
libsigsegv_tst_LDADD = -lsigsegv
endif
diff --git a/tests/malloc-2.sh b/tests/malloc-2.sh
new file mode 100755
index 0000000..e1cf297
--- /dev/null
+++ b/tests/malloc-2.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# Since the malloc binary is in the target ABI, make sure the exec is
+# launched from the same ABI so the same libsandbox.so is used.
+timeout -s KILL 10 execvp-0 malloc_hooked_tst malloc_hooked_tst
diff --git a/tests/malloc.at b/tests/malloc.at
index 081d7d2..d364b4b 100644
--- a/tests/malloc.at
+++ b/tests/malloc.at
@@ -1 +1,2 @@
SB_CHECK(1)
+SB_CHECK(2)
diff --git a/tests/malloc_hooked_tst.c b/tests/malloc_hooked_tst.c
new file mode 100644
index 0000000..18737fe
--- /dev/null
+++ b/tests/malloc_hooked_tst.c
@@ -0,0 +1,44 @@
+/* Make sure programs that override malloc don't mess us up:
+ *
+ * libsandbox's __attribute__((constructor)) libsb_init ->
+ * libsandbox's malloc() ->
+ * dlsym("mmap") ->
+ * glibc's libdl calls malloc ->
+ * tcmalloc's internal code calls open ->
+ * libsandbox's open wrapper is hit ->
+ * libsandbox tries to initialize itself (since it never finished originally) ->
+ * libsandbox's malloc() ->
+ * dlsym() -> deadlock
+ * http://crbug.com/586444
+ */
+
+#include "headers.h"
+
+static void *malloc_hook(size_t size, const void *caller)
+{
+ int urandom_fd = open("/dev/urandom", O_RDONLY);
+ close(urandom_fd);
+ return NULL;
+}
+
+void *(*__malloc_hook)(size_t, const void *) = &malloc_hook;
+
+static void *thread_start(void *arg)
+{
+ return arg;
+}
+
+int main(int argc, char *argv[])
+{
+ /* Make sure we reference some pthread symbols, although we don't
+ * really want to execute it -- our malloc is limited. */
+ if (argc < 0) {
+ pthread_t tid;
+ pthread_create(&tid, NULL, thread_start, NULL);
+ }
+
+ /* Trigger malloc! */
+ if (malloc(100)) {}
+
+ return 0;
+}
next reply other threads:[~2016-03-29 12:24 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-29 12:24 Mike Frysinger [this message]
-- strict thread matches above, loose matches on Subject: below --
2015-09-11 7:53 [gentoo-commits] proj/sandbox:master commit in: libsandbox/wrapper-funcs/, tests/, libsandbox/ Mike Frysinger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1455668633.55087abd8dc9802cf68cade776fe612a3f19f6a1.vapier@gentoo \
--to=vapier@gentoo.org \
--cc=gentoo-commits@lists.gentoo.org \
--cc=gentoo-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox