From: "Sergei Trofimovich" <slyfox@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/sandbox:master commit in: libsandbox/wrapper-funcs/
Date: Sat, 9 Mar 2019 19:21:40 +0000 (UTC) [thread overview]
Message-ID: <1551555431.c8146cfbcd36f9be4a447bf057811fe2f6c543b2.slyfox@gentoo> (raw)
commit: c8146cfbcd36f9be4a447bf057811fe2f6c543b2
Author: Sergei Trofimovich <slyfox <AT> gentoo <DOT> org>
AuthorDate: Sat Mar 2 19:15:14 2019 +0000
Commit: Sergei Trofimovich <slyfox <AT> gentoo <DOT> org>
CommitDate: Sat Mar 2 19:37:11 2019 +0000
URL: https://gitweb.gentoo.org/proj/sandbox.git/commit/?id=c8146cfb
exec wrapper: add support for GNU_HASH parser
In bug #672918 exec's ELF parser was going out of symbol table
range. It happened to trigger on a binary linked by LLVM's lld.
sandbox has no precise way to determine symbol table size and
uses the heuristic of detecting SYMTAB dynamic section size:
STRTAB (or GNU_LIBLIST for prelinked binaries).
This is a weak assumption and does not hold for lld-linked
binaries where the ordering is SYMTAB->GNU_HASH->STRTAB.
As a result sandbox SIGSEGVs when parses GNU_HASH as SYMTAB
entries.
The change adds GNU_HASH parser to iterate exactly through
exported symbols. That way we avoid heuristic code completely.
While at it repobe GNU_LIBLIST hack as we should not rely
on size heuristics anymore.
This has a nice side-effect of speeding up ELF parsing as
GHU_HASH only iterates through exported symbols. HASH
comparison has all dynamic symbols: both exported and
imported.
Reported-by: Dennis Schridde
Bug: https://bugs.gentoo.org/672918
Signed-off-by: Sergei Trofimovich <slyfox <AT> gentoo.org>
libsandbox/wrapper-funcs/__wrapper_exec.c | 108 ++++++++++++++++++++----------
1 file changed, 71 insertions(+), 37 deletions(-)
diff --git a/libsandbox/wrapper-funcs/__wrapper_exec.c b/libsandbox/wrapper-funcs/__wrapper_exec.c
index 974156e..766245a 100644
--- a/libsandbox/wrapper-funcs/__wrapper_exec.c
+++ b/libsandbox/wrapper-funcs/__wrapper_exec.c
@@ -83,8 +83,8 @@ static bool sb_check_exec(const char *filename, char *const argv[])
({ \
Elf##n##_Ehdr *ehdr = (void *)elf; \
Elf##n##_Phdr *phdr = (void *)(elf + ehdr->e_phoff); \
- Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0, vhash = 0, vliblist = 0; \
- Elf##n##_Off offset, symoff = 0, stroff = 0, hashoff = 0, liblistoff = 0; \
+ Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0, vhash = 0, vgnuhash = 0; \
+ Elf##n##_Off offset, symoff = 0, stroff = 0, hashoff = 0, gnuhashoff = 0; \
Elf##n##_Dyn *dyn; \
Elf##n##_Sym *sym, *symend; \
uint##n##_t ent_size = 0, str_size = 0; \
@@ -107,7 +107,7 @@ static bool sb_check_exec(const char *filename, char *const argv[])
case DT_STRTAB: vstr = dyn->d_un.d_val; break; \
case DT_STRSZ: str_size = dyn->d_un.d_val; break; \
case DT_HASH: vhash = dyn->d_un.d_val; break; \
- case DT_GNU_LIBLIST: vliblist = dyn->d_un.d_val; break; \
+ case DT_GNU_HASH: vgnuhash = dyn->d_un.d_val; break; \
} \
++dyn; \
} \
@@ -127,8 +127,8 @@ static bool sb_check_exec(const char *filename, char *const argv[])
stroff = offset + (vstr - vaddr); \
if (vhash >= vaddr && vhash < vaddr + filesz) \
hashoff = offset + (vhash - vaddr); \
- if (vliblist >= vaddr && vliblist < vaddr + filesz) \
- liblistoff = offset + (vliblist - vaddr); \
+ if (vgnuhash >= vaddr && vgnuhash < vaddr + filesz) \
+ gnuhashoff = offset + (vgnuhash - vaddr); \
} \
\
/* Finally walk the symbol table. This should generally be fast as \
@@ -137,47 +137,81 @@ static bool sb_check_exec(const char *filename, char *const argv[])
*/ \
if (symoff && stroff) { \
/* Nowhere is the # of symbols recorded, or the size of the symbol \
- * table. Instead, we do what glibc does: use the sysv hash table \
- * if it exists, else assume that the string table always directly \
+ * table. Instead, we do what glibc does: use the gnu or sysv hash \
+ * table if it exists, else assume that the string table always directly \
* follows the symbol table. This seems like a poor assumption to \
* make, but glibc has gotten by this long. See determine_info in \
* glibc's elf/dl-addr.c. \
* \
- * Turns out prelink will violate that assumption. Fortunately it \
- * will insert its liblist at the same location all the time -- it \
- * replaces the string table with its liblist table. \
- * \
- * Long term, we should behave the same as glibc and walk the gnu \
- * hash table first before falling back to the raw symbol table. \
- * \
* We don't sanity check the ranges here as you aren't executing \
* corrupt programs in the sandbox. \
*/ \
sym = (void *)(elf + symoff); \
- if (vhash) { \
- /* Hash entries are always 32-bits. */ \
- uint32_t *hashes = (void *)(elf + hashoff); \
- symend = sym + hashes[1]; \
- } else if (vliblist) \
- symend = (void *)(elf + liblistoff); \
- else \
- symend = (void *)(elf + stroff); \
- \
- while (sym < symend) { \
- char *symname = (void *)(elf + stroff + sym->st_name); \
- if (ELF##n##_ST_VISIBILITY(sym->st_other) == STV_DEFAULT && \
- sym->st_shndx != SHN_UNDEF && sym->st_shndx < SHN_LORESERVE && \
- sym->st_name && \
- /* Minor optimization to avoid strcmp. */ \
- symname[0] == '_' && symname[1] == '_') { \
- /* Blacklist internal C library symbols. */ \
- for (i = 0; i < ARRAY_SIZE(libc_alloc_syms); ++i) \
- if (!strcmp(symname, libc_alloc_syms[i])) { \
- run_in_process = false; \
- goto use_trace; \
- } \
+ if (vgnuhash) { \
+ uint32_t *hash32 = (void *)(elf + gnuhashoff); \
+ /* use glibc's elf/dl-lookup.c:_dl_setup_hash() as a reference */ \
+ /* DT_GNU_HASH header: */ \
+ uint32_t nbuckets = *hash32++; \
+ uint32_t symbias = *hash32++; \
+ uint32_t bitmask_nwords = *hash32++; \
+ hash32++; /* gnu_shift */ \
+ hash32 += n / 32 * bitmask_nwords; /* gnu_bitmask */ \
+ uint32_t *gnu_buckets = hash32; \
+ hash32 += nbuckets; \
+ uint32_t *gnu_chain_zero = hash32 - symbias; \
+ \
+ uint32_t bucket; \
+ \
+ for (bucket = 0; bucket < nbuckets; bucket++) { \
+ uint32_t symndx = gnu_buckets[bucket]; \
+ if (symndx != 0) { \
+ const uint32_t *hasharr = &gnu_chain_zero[symndx]; \
+ do { \
+ Elf##n##_Sym * s = &sym[symndx]; \
+ \
+ /* keep in sync with 'vhash' case */ \
+ char *symname = (void *)(elf + stroff + s->st_name); \
+ if (ELF##n##_ST_VISIBILITY(s->st_other) == STV_DEFAULT && \
+ s->st_shndx != SHN_UNDEF && s->st_shndx < SHN_LORESERVE && \
+ s->st_name && \
+ /* Minor optimization to avoid strcmp. */ \
+ symname[0] == '_' && symname[1] == '_') { \
+ /* Blacklist internal C library symbols. */ \
+ for (i = 0; i < ARRAY_SIZE(libc_alloc_syms); ++i) \
+ if (!strcmp(symname, libc_alloc_syms[i])) { \
+ run_in_process = false; \
+ goto use_trace; \
+ } \
+ } \
+ ++symndx; \
+ } while ((*hasharr++ & 1u) == 0); \
+ } \
+ } \
+ } else { \
+ if (vhash) { \
+ /* Hash entries are always 32-bits. */ \
+ uint32_t *hashes = (void *)(elf + hashoff); \
+ symend = sym + hashes[1]; \
+ } else \
+ symend = (void *)(elf + stroff); \
+ \
+ while (sym < symend) { \
+ /* keep insync with 'vgnuhash' case */ \
+ char *symname = (void *)(elf + stroff + sym->st_name); \
+ if (ELF##n##_ST_VISIBILITY(sym->st_other) == STV_DEFAULT && \
+ sym->st_shndx != SHN_UNDEF && sym->st_shndx < SHN_LORESERVE && \
+ sym->st_name && \
+ /* Minor optimization to avoid strcmp. */ \
+ symname[0] == '_' && symname[1] == '_') { \
+ /* Blacklist internal C library symbols. */ \
+ for (i = 0; i < ARRAY_SIZE(libc_alloc_syms); ++i) \
+ if (!strcmp(symname, libc_alloc_syms[i])) { \
+ run_in_process = false; \
+ goto use_trace; \
+ } \
+ } \
+ ++sym; \
} \
- ++sym; \
} \
} \
\
next reply other threads:[~2019-03-09 19:21 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-09 19:21 Sergei Trofimovich [this message]
-- strict thread matches above, loose matches on Subject: below --
2025-02-22 19:49 [gentoo-commits] proj/sandbox:master commit in: libsandbox/wrapper-funcs/ Mike Gilbert
2024-12-22 3:49 [gentoo-commits] proj/sandbox:stable-2.x " Mike Gilbert
2025-01-14 4:38 ` [gentoo-commits] proj/sandbox:master " Mike Gilbert
2024-12-22 3:41 Mike Gilbert
2021-10-18 5:21 Mike Frysinger
2016-11-16 22:19 Mike Frysinger
2016-03-30 5:16 Mike Frysinger
2013-02-25 4:03 Mike Frysinger
2012-12-24 8:02 Mike Frysinger
2012-06-24 0:06 Mike Frysinger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1551555431.c8146cfbcd36f9be4a447bf057811fe2f6c543b2.slyfox@gentoo \
--to=slyfox@gentoo.org \
--cc=gentoo-commits@lists.gentoo.org \
--cc=gentoo-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox