#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0-only # SPDX-FileCopyrightText: 2024 Eddie Chapman # WARNING: this script is currently not a full replacement for xz, it just mimicks # some of the decompression functionality of xz. It is only designed at the # moment to be called by Portage and even then it does not yet cover all cases of # that. # Some places in portage where xz is called: # - /usr/lib/portage/python3.11/phase-helpers.sh # This is where 99% of calls to xz happen, from the line: # __unpack_tar "xz -T$(___makeopts_jobs) -d" # in the unpack phase. # This results in a call to xz inside __unpack_tar() where the -c arg is added (for stdout) # and the filename is added as an argument. # - /usr/bin/deb2targz # Some packages e.g. google-chrome have deb distfiles which can contain a data.tar.xz # file so deb2targz launches xz -dc to decompress that. # - /usr/bin/rpm2tar # Some packages e.g. libreoffice have rpm distfiles compressed with xz so rpm2tar launches # xz -dc to decompress them # - /usr/portage/eclass/llvm.org.eclass # xz is not called directly but tar -x -J is run (and tar then runs "xz -d" with piped # in/out, with no file as argument) LOGGER=$(command -v logger) if [ ! -x "${LOGGER}" ]; then echo "(wrapper): Fatal error: logger command does not appear to exist!" exit 1 fi LOG_PREFIX="(wrapper):" # /usr/bin/7za is just a wrapper that executes this SEVEN_ZA="/usr/lib64/p7zip/7za" DATE_CMD=$(command -v date) MKTEMP_CMD=$(command -v mktemp) PS_CMD=$(command -v ps) CAT_CMD=$(command -v cat) CHMOD=$(command -v chmod) WHOAMI=$(command -v whoami) GREP=$(command -v grep) FILE_CMD=$(command -v file) READLINK=$(command -v readlink) for EXE_F in ${SEVEN_ZA} ${DATE_CMD} ${MKTEMP_CMD} ${PS_CMD} ${CAT_CMD} ${CHMOD} \ ${WHOAMI} ${GREP} ${FILE_CMD} ${READLINK}; do if [ ! -x "${EXE_F}" ]; then MSG="${LOG_PREFIX} Fatal Error: ${EXE_F} does not exist or is not an exe!" ${LOGGER} -p syslog.err -t "${0}" "${MSG}" exit 1 fi done DECOMPRESS_REQUESTED=N STDOUT_REQUESTED=N for myarg in "${@}"; do # Look for the 3 forms of xz's decompress argument when it is by itself. # TO-DO: collapse these into one grep command, improve the horrible regex. echo "${myarg}" | ${GREP} -Eq '^[-]d$' retA=$? echo "${myarg}" | ${GREP} -Eq '^[-][-]decompress$' retB=$? echo "${myarg}" | ${GREP} -Eq '^[-][-]uncompress$' retC=$? if [ ${retA} -eq 0 ] || [ ${retB} -eq 0 ] || [ ${retC} -eq 0 ]; then DECOMPRESS_REQUESTED=Y fi # Look for the 3 forms of xz's stdout argument when it is by itself. # TO-DO: collapse these into one grep command, improve the horrible regex. echo "${myarg}" | ${GREP} -Eq '^[-]c$' retA=$? echo "${myarg}" | ${GREP} -Eq '^[-][-]to[-]stdout$' retB=$? echo "${myarg}" | ${GREP} -Eq '^[-][-]stdout$' retC=$? if [ ${retA} -eq 0 ] || [ ${retB} -eq 0 ] || [ ${retC} -eq 0 ]; then STDOUT_REQUESTED=Y fi # and look for both together as -dc or -cd # TO-DO: collapse these into one grep command, improve the horrible regex. echo "${myarg}" | ${GREP} -Eq '^[-]dc$' retA=$? echo "${myarg}" | ${GREP} -Eq '^[-]cd$' retB=$? if [ ${retA} -eq 0 ] || [ ${retB} -eq 0 ]; then DECOMPRESS_REQUESTED=Y STDOUT_REQUESTED=Y fi done # This script only tries to decompress. No compress functionaility at all # at this stage in its development. if [ "${DECOMPRESS_REQUESTED}" = "N" ]; then MSG="${LOG_PREFIX} Fatal Error: no (d|decompress|uncompress) option on the command line. Sorry, this wrapper script only supports decompression." #echo "$MSG" ${LOGGER} -p syslog.err -t "${0}" "${MSG}" exit 1 fi # DEBUG #MSG="${LOG_PREFIX} (DEBUG) stdout requested? ${STDOUT_REQUESTED}" #${LOGGER} -p syslog.info -t "${0}" "${MSG}" WHO_CALLED=$(${WHOAMI}) # get the parent command, very useful for debugging PARENT_CMD=$(${PS_CMD} -o args= ${PPID}) # DEBUG, avoid leaving enabled long term as potential for future security problem, # due to unescaped attacker controlled info being passed to logger #MSG="${LOG_PREFIX} (DEBUG) U: ${WHO_CALLED}, PARENT: ${PARENT_CMD}, ARGS: ${@}" #${LOGGER} -p syslog.info -t "${0}" "${MSG}" f_passed_to_script= # loop over args again to see if any file has been passed as an arg # TO-DO there will be a better way of doing this. for myarg in "${@}"; do # TO-DO, are there other possible extensions for xz. Also theoretically possible we # could be passed one with extension in caps. echo "${myarg}" | ${GREP} -Eq '[.]xz$' r=$? if [ ${r} -eq 0 ]; then f_passed_to_script="${myarg}" break fi done function do_uncompress { # remember return numbers can only be btw 0 - 255 # some sanity checks follow ... if [ -z "${1}" ]; then MSG_TO_SHOW="function requires 1 argument; a filename with our without leading path" return 184 fi realf=$(${READLINK} -e "${1}" 2>/dev/null) if [ ! -f "${realf}" ]; then MSG_TO_SHOW="argument supplied either is not a file or, if it is a file, I cannot find it." return 194 fi if ! test -s "${realf}"; then MSG_TO_SHOW="file supplied is empty!" return 204 fi if [ ! -r "${realf}" ]; then MSG_TO_SHOW="file supplied cannot be read!" return 214 fi # 7z does not like the file path being passed to it inside quotes. So lets just make # sure not to pass it anything that contains any characters NOT in our sane list # in our regex below (so nothing needs quoting as no shell metachars). # TO-DO: there will be a better way of dealing with this, prob by converting to an # escaped string. But for now (2024) haven't come across any distfiles with weird chars # in them thankfully. echo "${realf}" | ${GREP} -Evq '[A-Za-z0-9_:@%+/.-]' r=$? if [ ${r} -eq 0 ]; then MSG_TO_SHOW="found unsupported characters in the (real) file path." return 224 fi # Make sure we have been given an xz file. # the -e arguments exclude tests we're not interested in, hopefully some tiny perf gain # but more importantly reduce attack surface. # Also we hae it output the mime type rather than a human readable string, more reliable ${FILE_CMD} -e ascii -e cdf -e apptype -e csv -e elf -e json -e simh -e tar --mime ${realf} 2>/dev/null | ${GREP} -q 'application/x-xz;' r=$? if [ ${r} -ne 0 ]; then MSG_TO_SHOW="file supplied does not appear to be an xz file, according to the file command" return 234 fi # initialise this string for the 7za stdout option (-so). Empty (no stdout) by default. STDOUT_OPT_STR='' # and this string to, by default, redirect 7za output to /dev/null, as it is somewhat # chatty when decompressing. STDOUT_REDIR_STR='>/dev/null' # If the uncompressed data should be sent to stdout then the above vars need to be changed. if [ "${STDOUT_REQUESTED}" = "Y" ]; then STDOUT_OPT_STR='-so' STDOUT_REDIR_STR='' fi # we currently set stderr to redirect to /dev/null always. STDERR_REDIR_STR='2>/dev/null' SEVEN_ZA_FULL_CMD="${SEVEN_ZA} e ${realf} ${STDOUT_OPT_STR} -bd ${STDOUT_REDIR_STR} ${STDERR_REDIR_STR}" MSG="${LOG_PREFIX} About to run: ${SEVEN_ZA_FULL_CMD}" ${LOGGER} -p syslog.info -t "${0}" "${MSG}" # In 99% of cases we will redirect 7za decompressed output to stdout (-so). # So make really sure nothing gets output to stdout by this script after this point! # Log messages all only to logger. eval "${SEVEN_ZA_FULL_CMD}" return $? } last_r=0 LAST_ERR_MSG= # if no file was detected we assume we will get compressed data via stdin if [ -z "${f_passed_to_script}" ]; then # We create a temp file to save the stdin compressed data into as the current p7zip provided # 7za does not work properly if fed data via stdin, though according to docs it # should work, so probably a bug. # Also note within sandbox tmp space is not the system /tmp AFAICT. # Unfortunately this means we need to make sure we have enough space for the uncompressed # data both in /tmp as well as whichever filesystem we set portage to do builds on. mytf=`${MKTEMP_CMD}` sleep 0.3 # sanity if [ -e "${mytf}" ]; then ${CHMOD} 0600 "${mytf}" else MSG="${LOG_PREFIX} Fatal Error: something has gone very wrong, no temp file exists!" ${LOGGER} -p syslog.err -t "${0}" "${MSG}" exit 1 fi # save stdin to our tmpfile. quotes shld not be needed but, what the hell, might as well. cat > "${mytf}" r=$? # sanity if [ ${r} -ne 0 ]; then MSG="${LOG_PREFIX} Fatal Error: cat returned non-zero code of ${r} when redirecting to ${mytf}!" ${LOGGER} -p syslog.err -t "${0}" "${MSG}" exit 1 else # Even if stdout was NOT requested using a command line argument (and thus STDOUT_REQUESTED will be set to N), # xz assumes that you *do* want the uncompressed stream to go to stdout if no file was given on the command line (naturally). # So we need to force this to Y here to make sure that happens. STDOUT_REQUESTED=Y # Having saved stdin to the tmp file above we can now have 7za decompress said tmp file. # Remember from here on we run 7za which will in most cases output binary data to stdout. # So make REALLY sure nothing else gets output after this! # Log messages all only to logger. do_uncompress "${mytf}" last_r=$? # error message numbers inside do_uncompress(), hopefully none clash with those used by 7za if [ ${last_r} = 184 ] || [ ${last_r} = 194 ] || [ ${last_r} = 204 ] || [ ${last_r} = 214 ] || [ ${last_r} = 224 ] || [ ${last_r} = 234 ]; then LAST_ERR_MSG="do_uncompress(): ${MSG_TO_SHOW}" elif [ ${last_r} -ne 0 ]; then LAST_ERR_MSG="7za returned non-zero code of ${last_r} when trying to decompress stdin!" fi fi # this is our created temp file rather than one supplied to the script so shld be deleted rm -f "${mytf}" elif [ -e "${f_passed_to_script}" ]; then # Remember from here on we run 7za which will in most cases output binary data to stdout. # So make REALLY sure nothing else gets output after this! # Log messages all only to logger. do_uncompress "${f_passed_to_script}" last_r=$? # error message numbers inside do_uncompress(), hopefully none clash with those used by 7za if [ ${last_r} = 184 ] || [ ${last_r} = 194 ] || [ ${last_r} = 204 ] || [ ${last_r} = 214 ] || [ ${last_r} = 224 ] || [ ${last_r} = 234 ]; then LAST_ERR_MSG="do_uncompress(): ${MSG_TO_SHOW}" elif [ ${last_r} -ne 0 ]; then LAST_ERR_MSG="7za returned non-zero code of ${last_r} when trying to decompress the file!" fi # 7za does not delete the original file by default but xz does # If stdout was not requested then they will be expecting us to delete it so do that. # TO-DO: catch --keep argument and do not delete if it is passed if [ "${STDOUT_REQUESTED}" = "N" ]; then rm -f "${f_passed_to_script}" fi else last_r=1 LAST_ERR_MSG="no valid file was found in supplied arguments and stdin was not an xz stream!" fi if [ ${last_r} -ne 0 ]; then ${LOGGER} -p syslog.err -t "${0}" "${LOG_PREFIX} Fatal Error: ${LAST_ERR_MSG}" >/dev/null 2>&1 exit 1 else exit 0 fi